Class: Parser::NewsKZ
Instance Method Summary collapse
-
#initialize(source, options = {}) ⇒ NewsKZ
constructor
A new instance of NewsKZ.
- #parse_all ⇒ Object
Methods inherited from Base
#click, #collect_between, #copyright, #next_page, #parse_page
Constructor Details
#initialize(source, options = {}) ⇒ NewsKZ
Returns a new instance of NewsKZ.
3 4 5 6 |
# File 'lib/fly_parser/sources/news-kz.rb', line 3 def initialize(source, = {}) @delay = 2 super end |
Instance Method Details
#parse_all ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/fly_parser/sources/news-kz.rb', line 8 def parse_all links = @source.search("li.c__news_item a:first") links.map do |link| page = click(link) begin title = page.search(".c__article_caption").text() content_wrapper = page.search('.c__article_text') image_wrapper = content_wrapper.search('.wp-caption img').first next unless image_wrapper poster_image = image_wrapper.attributes['src'].value content_wrapper.search('.wp-caption').remove() content_wrapper.search('.c__article_mistake').remove() content_wrapper.search('p[style="display:none"]').remove() content_wrapper.search("a").remove() content_wrapper.search("span:contains(Копирование)").remove() full_desc = content_wrapper.to_html full_desc.gsub!(/<iframe.*><\/iframe>/, '') copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>" content = full_desc + copyright {title: title, content: content, poster_image: poster_image} rescue Exception => e puts e. next end end.compact end |