Class: IMW::Parsers::HtmlParser

Inherits:
Object
  • Object
show all
Includes:
HtmlMatchers
Defined in:
lib/titi/ignore/html_parser.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(arg_spec = nil) ⇒ HtmlParser

Parse Tree



205
206
207
208
# File 'lib/titi/ignore/html_parser.rb', line 205

def initialize arg_spec=nil
  spec = arg_spec || self.class.parser_spec
  self.parse_tree = IMW::Parsers::HtmlMatchers.build_parse_tree(spec)
end

Instance Attribute Details

#parse_treeObject

Returns the value of attribute parse_tree.



200
201
202
# File 'lib/titi/ignore/html_parser.rb', line 200

def parse_tree
  @parse_tree
end

Class Method Details

.attr(selector, attr, matcher = nil) ⇒ Object

match the attr attribute of the first element given by selector



232
233
234
# File 'lib/titi/ignore/html_parser.rb', line 232

def self.attr selector, attr, matcher=nil
  MatchAttribute.new(selector, attr, IMW::Parsers::HtmlMatchers.build_parse_tree(matcher))
end

.href(selector, matcher = nil) ⇒ Object

shorthand for attr(foo, ‘href’)



236
237
238
# File 'lib/titi/ignore/html_parser.rb', line 236

def self.href selector, matcher=nil
  self.attr(selector, 'href', matcher)
end

.one(selector, matcher) ⇒ Object

one(“hpricot_path”) first match to hpricot_path one(“hpricot_path”, /spec/) applies spec to first match to hpricot_path



228
229
230
# File 'lib/titi/ignore/html_parser.rb', line 228

def self.one selector, matcher
  MatchFirstElement.new(selector, IMW::Parsers::HtmlMatchers.build_parse_tree(matcher))
end

.parser_specObject

See IMW::HtmlParser for syntax



214
215
216
# File 'lib/titi/ignore/html_parser.rb', line 214

def self.parser_spec
  raise "Override this to create your own parser spec"
end

.proc(selector, proc, matcher = nil) ⇒ Object



244
245
246
# File 'lib/titi/ignore/html_parser.rb', line 244

def self.proc selector, proc, matcher=nil
  MatchProc.new(selector, proc, IMW::Parsers::HtmlMatchers.build_parse_tree(matcher))
end

.re(selector, re) ⇒ Object



264
265
266
# File 'lib/titi/ignore/html_parser.rb', line 264

def self.re selector, re
  MatchRegexp.new(selector, re, nil, :capture => 1)
end

.re_all(selector, re, matcher = nil) ⇒ Object



267
268
269
# File 'lib/titi/ignore/html_parser.rb', line 267

def self.re_all selector, re, matcher=nil
  MatchRegexpRepeatedly.new(selector, re)
end

.re_group(selector, re) ⇒ Object



261
262
263
# File 'lib/titi/ignore/html_parser.rb', line 261

def self.re_group selector, re
  MatchRegexp.new(selector, re)
end

.src(selector, matcher = nil) ⇒ Object

shorthand for attr(foo, ‘src’)



240
241
242
# File 'lib/titi/ignore/html_parser.rb', line 240

def self.src selector, matcher=nil
  self.attr(selector, 'src', matcher)
end

.strip(selector, matcher = nil) ⇒ Object



257
258
259
# File 'lib/titi/ignore/html_parser.rb', line 257

def self.strip selector, matcher=nil
  proc selector, lambda{|v| v.strip }, matcher
end

.to_json(selector, matcher = nil) ⇒ Object



253
254
255
# File 'lib/titi/ignore/html_parser.rb', line 253

def self.to_json selector, matcher=nil
  proc selector, lambda{|v| v.to_json if v }, matcher
end

.to_num(selector, matcher = nil) ⇒ Object

strip “,”s (!! thus disrespecting locale !!!) and convert to int



250
251
252
# File 'lib/titi/ignore/html_parser.rb', line 250

def self.to_num selector, matcher=nil
  proc selector, lambda{|num| num.to_s.gsub(/,/,'').to_i if num }, matcher
end

Instance Method Details

#parse(doc) ⇒ Object

Walk



221
222
223
# File 'lib/titi/ignore/html_parser.rb', line 221

def parse doc
  self.parse_tree.match(doc)
end