Class: GDNewsScraper::Scrapers::PolygonCOM::News

Inherits:
Object
  • Object
show all
Defined in:
lib/GDNewsScraper/scrapers/polygon_com/news.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(offset = 0) ⇒ News

Returns a new instance of News.



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/GDNewsScraper/scrapers/polygon_com/news.rb', line 40

def initialize(offset = 0)
  uri = "#{ STREAM_URI }/news/archives/#{ offset }"

  @page   ||= Nokogiri::HTML(open(uri, HEADERS))
  @stream = Hash.new

  stream[:stream] = Hash.new
  stream[:stream][:size]  = @page.css(DOM[:pagination][:info]).text.split[0].gsub(/\D/, '').to_i
  stream[:stream][:pages] = @page.css(DOM[:pagination][:info]).text.split[6].gsub(/\D/, '').to_i
  stream[:stream][:prev]  = @page.css(DOM[:pagination][:previous])&.first&.attr('href')&.split('/')&.last.to_i
  stream[:stream][:next]  = @page.css(DOM[:pagination][:next])&.first&.attr('href')&.split('/')&.last.to_i

  stream[:feed] = Hash.new
  stream[:feed][:url] = STREAM_URI
  stream[:feed][:source] = 'polygon'
  stream[:feed][:label] = 'Polygon'

  stream[:articles] = Array.new

  perform
rescue
  return 'There was a problem initializing the PolygonCOM::News Service'
end

Instance Attribute Details

#streamObject

Returns the value of attribute stream.



38
39
40
# File 'lib/GDNewsScraper/scrapers/polygon_com/news.rb', line 38

def stream
  @stream
end

Instance Method Details

#parse(article) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/GDNewsScraper/scrapers/polygon_com/news.rb', line 72

def parse(article)
  pulse = Hash.new

  is_a_video = !article.at(DOM[:article][:inner_container_video]).nil?

  key   = article.css(DOM[:article][:inner_container]).first.attr('data-chorus-optimize-id').to_i
  url   = article.css(DOM[:article][:title]).children.first.attr('href')
  title = article.css(DOM[:article][:title]).children.first.text

  pulse[:id]   = key
  pulse[:hash] = ::Base64.encode64("#{ title } - #{ key }")

  begin
    pulse[:cover] = article.children.css(DOM[:article][:cover]).children.children.first.attr('src')
  rescue
    pulse[:cover] = nil
  end

  pulse[:url]     = url
  pulse[:title]   = title
  pulse[:author]  = article.css(DOM[:article][:meta]).first.children[1].children[1].text
  pulse[:date]    = JSON.parse(article.css(DOM[:article][:meta]).first.attr('data-cdata'))['timestamp'].to_i
  pulse[:content] = parse_article_body(url, is_a_video)
  pulse[:tags]    = title.downcase.split


  return pulse
rescue
  return 'There was a problem creating the article in the PolygonCOM::News Service'
end

#performObject



64
65
66
67
68
69
70
# File 'lib/GDNewsScraper/scrapers/polygon_com/news.rb', line 64

def perform
  @page.css(DOM[:article][:container]).each do |article|
    stream[:articles] << parse(article)
  end
rescue
  return 'There was a problem performing the initial task in the PolygonCOM::News Service'
end