Class: FeedProcessorUtils::HTMLParser

Inherits:
Object
  • Object
show all
Defined in:
lib/feed_processor_utils/html_parser.rb

Constant Summary collapse

@@default_config =
File.join(File.dirname(__FILE__), "config/html_parser.yml")

Instance Method Summary collapse

Constructor Details

#initialize(config_file = nil) ⇒ HTMLParser

Returns a new instance of HTMLParser.



9
10
11
12
# File 'lib/feed_processor_utils/html_parser.rb', line 9

def initialize(config_file = nil)
  config_file ||= @@default_config
  @config = YAML.load(File.read(config_file))
end

Instance Method Details

#parse_data(input) ⇒ Object



14
15
16
17
18
19
20
21
22
23
# File 'lib/feed_processor_utils/html_parser.rb', line 14

def parse_data(input)
  input_doc = Nokogiri::HTML(input)
  parsed = Hash[
    fields.map do |field_name, parsing_data|
      [field_name, extract_field(input_doc, parsing_data)]
    end
  ]
  parse_lazy_images!(parsed[:lazy_image_tags]) if parsed[:lazy_image_tags]
  parsed
end

#parse_url(url) ⇒ Object



25
26
27
28
# File 'lib/feed_processor_utils/html_parser.rb', line 25

def parse_url(url)
  input = open(url).read
  parse_data(input)
end