Module: HTML5::CLI

Defined in:
lib/html5/cli.rb

Defined Under Namespace

Classes: PythonicTemplate

Class Method Summary collapse

Class Method Details

.open_input(f) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/html5/cli.rb', line 136

def self.open_input f
  if f
    begin
      if f[0..6] == 'http://'
        require 'open-uri'
        f = URI.parse(f).open
        encoding = f.charset
      elsif f == '-'
        f = $stdin
      else
        f = open(f)
      end
    rescue
    end
  else
    $stderr.write("No filename provided. Use -h for help\n")
    exit(1)
  end
  f
end

.parse(opts, args) ⇒ Object



157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/html5/cli.rb', line 157

def self.parse(opts, args)
  encoding = nil

  f = open_input args.last

  require 'html5/treebuilders'
  treebuilder = HTML5::TreeBuilders[opts.treebuilder]

  if opts.output == :xml
    require 'html5/liberalxmlparser'
    p = HTML5::XMLParser.new(:tree=>treebuilder)
  else
    require 'html5/html5parser'
    p = HTML5::HTMLParser.new(:tree=>treebuilder)
  end

  if opts.parsemethod == :parse
    args = [f, encoding]
  else
    args = [f, (opts.container || 'div'), encoding]
  end

  if opts.profile
    require 'profiler'
    Profiler__::start_profile
    p.send(opts.parsemethod, *args)
    Profiler__::stop_profile
    Profiler__::print_profile($stderr)
  elsif opts.time
    require 'time' # TODO: switch to benchmark
    t0 = Time.new
    document = p.send(opts.parsemethod, *args)
    t1 = Time.new
    print_output(p, document, opts)
    t2 = Time.new
    puts "\n\nRun took: #{t1-t0}s (plus #{t2-t1}s to print the output)"
  else
    document = p.send(opts.parsemethod, *args)
    print_output(p, document, opts)
  end
end

.parse_opts(argv) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/html5/cli.rb', line 8

def self.parse_opts argv
  options = OpenStruct.new
  options.profile        = false
  options.time           = false
  options.output         = :html
  options.treebuilder    = 'simpletree'
  options.error          = false
  options.encoding       = false
  options.parsemethod    = :parse
  options.serializer     = {
    :encoding            => 'utf-8',
    :omit_optional_tags  => false,
    :inject_meta_charset => false
  }

  opts = OptionParser.new do |opts|
    opts.separator ""
    opts.separator "Parse Options:"

    opts.on("-b", "--treebuilder NAME") do |treebuilder|
      options.treebuilder = treebuilder
    end

    opts.on("-f", "--fragment CONTAINER", "Parse as a fragment") do |container|
      options.parsemethod = :parse_fragment
      options.container = container if container
    end

    opts.separator ""
    opts.separator "Filter Options:"

    opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
      options.serializer[:inject_meta_charset] = inject
    end

    opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
      options.serializer[:strip_whitespace] = strip
    end

    opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
      options.serializer[:sanitize] = sanitize
    end

    opts.separator ""
    opts.separator "Output Options:"

    opts.on("--tree", "output as debug tree") do |tree|
      options.output = :tree
    end

    opts.on("-x", "--xml", "output as xml") do |xml|
      options.output = :xml
      options.treebuilder = "rexml"
    end

    opts.on("--[no-]html", "Output as html") do |html|
      options.output = (html ? :html : nil)
    end

    opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
      options.output = :hilite
    end

    opts.on("-e", "--error", "Print a list of parse errors") do |error|
      options.error = error
    end

    opts.separator ""
    opts.separator "Serialization Options:"

    opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
      options.serializer[:omit_optional_tags] = omit
    end

    opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
      options.serializer[:quote_attr_values] = quote
    end

    opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
      options.serializer[:use_best_quote_char] = best
    end

    opts.on("--quote-char C", "Use specified quote character") do |c|
      options.serializer[:quote_char] = c
    end

    opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
      options.serializer[:minimize_boolean_attributes] = min
    end

    opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
      options.serializer[:use_trailing_solidus] = slash
    end

    opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
      options.serializer[:escape_lt_in_attrs] = lt
    end

    opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
      options.serializer[:escape_rcdata] = rcdata
    end

    opts.separator ""
    opts.separator "Other Options:"

    opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
      options.profile = profile
    end

    opts.on("-t", "--[no-]time", "Time the run") do |time|
      options.time = time
    end

    opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
      options.encoding = encoding
    end

    opts.on_tail("-h", "--help", "Show this message") do
      puts opts
      exit
    end


  end
  opts.parse!(argv)
  options
end


199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# File 'lib/html5/cli.rb', line 199

def self.print_output(parser, document, opts)
  puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding

  case opts.output
  when :xml
    print document
  when :html
    require 'html5/treewalkers'
    tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
    require 'html5/serializer'
    puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
  when :hilite
    print document.hilite
  when :tree
    document = [document] unless document.respond_to?(:each)
    document.each {|fragment| puts parser.tree.testSerializer(fragment)}
  end

  if opts.error
    errList=[]
    for pos, errorcode, datavars in parser.errors
      formatstring = HTML5::E[errorcode] || 'Unknown error "%(errorcode)"'
      message = PythonicTemplate.new(formatstring).to_s(datavars)
      errList << "Line #{pos[0]} Col #{pos[1]} " + message
    end
    $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
  end
end

.runObject



244
245
246
247
# File 'lib/html5/cli.rb', line 244

def self.run
  options = parse_opts ARGV
  parse options, ARGV
end