Class: Parser::NewsKZ

Inherits:
Base
  • Object
show all
Defined in:
lib/fly_parser/sources/news-kz.rb

Instance Method Summary collapse

Methods inherited from Base

#click, #collect_between, #copyright, #next_page, #parse_page

Constructor Details

#initialize(source, options = {}) ⇒ NewsKZ

Returns a new instance of NewsKZ.



3
4
5
6
# File 'lib/fly_parser/sources/news-kz.rb', line 3

def initialize(source, options = {})
  @delay = 2
  super
end

Instance Method Details

#parse_allObject



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/fly_parser/sources/news-kz.rb', line 8

def parse_all
  links = @source.search("li.c__news_item a:first")
  
  links.map do |link|
    page = click(link)
    begin
    title = page.search(".c__article_caption").text()

    content_wrapper = page.search('.c__article_text')
    
    image_wrapper = content_wrapper.search('.wp-caption img').first
    next unless image_wrapper
    poster_image = image_wrapper.attributes['src'].value
    
    content_wrapper.search('.wp-caption').remove()
    content_wrapper.search('.c__article_mistake').remove()
    content_wrapper.search('p[style="display:none"]').remove()
    content_wrapper.search("a").remove()
    content_wrapper.search("span:contains(Копирование)").remove()
    
    full_desc = content_wrapper.to_html
    full_desc.gsub!(/<iframe.*><\/iframe>/, '')
    copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
    content = full_desc + copyright
    
    {title: title, content: content, poster_image: poster_image}

    rescue Exception => e
      puts e.message
      next
    end 
  end.compact
end