Class: Newly::NewsCrawler

Inherits:
Object
  • Object
show all
Defined in:
lib/newly/news_crawler.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(args) ⇒ NewsCrawler

Returns a new instance of NewsCrawler.



10
11
12
13
14
15
16
# File 'lib/newly/news_crawler.rb', line 10

def initialize(args)
  @feed = args[:feed]
  @url = args[:url]
  raise "The url is required" unless @url

  @selector = args[:selector] || Newly::Selector.new(Nokogiri::HTML(open @url))
end

Instance Attribute Details

#selectorObject (readonly)

Returns the value of attribute selector.



8
9
10
# File 'lib/newly/news_crawler.rb', line 8

def selector
  @selector
end

#titleObject (readonly)

Returns the value of attribute title.



8
9
10
# File 'lib/newly/news_crawler.rb', line 8

def title
  @title
end

#urlObject (readonly)

Returns the value of attribute url.



8
9
10
# File 'lib/newly/news_crawler.rb', line 8

def url
  @url
end

Instance Method Details

#fetchObject



18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/newly/news_crawler.rb', line 18

def fetch
  news_fetched = Set.new
  all_news = @selector.all(container: @feed.container, max: @feed.limit)

  all_news.each do |item|
    news = build_news_by(item)
    if news
      news_fetched << news
    end
  end

  news_fetched.to_a
end