Class: FeedProcessorUtils::HTMLParser
- Inherits:
-
Object
- Object
- FeedProcessorUtils::HTMLParser
- Defined in:
- lib/feed_processor_utils/html_parser.rb
Constant Summary collapse
- @@default_config =
File.join(File.dirname(__FILE__), "config/html_parser.yml")
Instance Method Summary collapse
-
#initialize(config_file = nil) ⇒ HTMLParser
constructor
A new instance of HTMLParser.
- #parse_data(input) ⇒ Object
- #parse_url(url) ⇒ Object
Constructor Details
#initialize(config_file = nil) ⇒ HTMLParser
Returns a new instance of HTMLParser.
9 10 11 12 |
# File 'lib/feed_processor_utils/html_parser.rb', line 9 def initialize(config_file = nil) config_file ||= @@default_config @config = YAML.load(File.read(config_file)) end |
Instance Method Details
#parse_data(input) ⇒ Object
14 15 16 17 18 19 20 21 22 23 |
# File 'lib/feed_processor_utils/html_parser.rb', line 14 def parse_data(input) input_doc = Nokogiri::HTML(input) parsed = Hash[ fields.map do |field_name, parsing_data| [field_name, extract_field(input_doc, parsing_data)] end ] parse_lazy_images!(parsed[:lazy_image_tags]) if parsed[:lazy_image_tags] parsed end |
#parse_url(url) ⇒ Object
25 26 27 28 |
# File 'lib/feed_processor_utils/html_parser.rb', line 25 def parse_url(url) input = open(url).read parse_data(input) end |