Class: GDNewsScraper::Scrapers::PolygonCOM::News
- Inherits:
-
Object
- Object
- GDNewsScraper::Scrapers::PolygonCOM::News
- Defined in:
- lib/GDNewsScraper/scrapers/polygon_com/news.rb
Instance Attribute Summary collapse
-
#stream ⇒ Object
Returns the value of attribute stream.
Instance Method Summary collapse
-
#initialize(offset = 0) ⇒ News
constructor
A new instance of News.
- #parse(article) ⇒ Object
- #perform ⇒ Object
Constructor Details
#initialize(offset = 0) ⇒ News
Returns a new instance of News.
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/GDNewsScraper/scrapers/polygon_com/news.rb', line 40 def initialize(offset = 0) uri = "#{ STREAM_URI }/news/archives/#{ offset }" @page ||= Nokogiri::HTML(open(uri, HEADERS)) @stream = Hash.new stream[:stream] = Hash.new stream[:stream][:size] = @page.css(DOM[:pagination][:info]).text.split[0].gsub(/\D/, '').to_i stream[:stream][:pages] = @page.css(DOM[:pagination][:info]).text.split[6].gsub(/\D/, '').to_i stream[:stream][:prev] = @page.css(DOM[:pagination][:previous])&.first&.attr('href')&.split('/')&.last.to_i stream[:stream][:next] = @page.css(DOM[:pagination][:next])&.first&.attr('href')&.split('/')&.last.to_i stream[:feed] = Hash.new stream[:feed][:url] = STREAM_URI stream[:feed][:source] = 'polygon' stream[:feed][:label] = 'Polygon' stream[:articles] = Array.new perform rescue return 'There was a problem initializing the PolygonCOM::News Service' end |
Instance Attribute Details
#stream ⇒ Object
Returns the value of attribute stream.
38 39 40 |
# File 'lib/GDNewsScraper/scrapers/polygon_com/news.rb', line 38 def stream @stream end |
Instance Method Details
#parse(article) ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/GDNewsScraper/scrapers/polygon_com/news.rb', line 72 def parse(article) pulse = Hash.new is_a_video = !article.at(DOM[:article][:inner_container_video]).nil? key = article.css(DOM[:article][:inner_container]).first.attr('data-chorus-optimize-id').to_i url = article.css(DOM[:article][:title]).children.first.attr('href') title = article.css(DOM[:article][:title]).children.first.text pulse[:id] = key pulse[:hash] = ::Base64.encode64("#{ title } - #{ key }") begin pulse[:cover] = article.children.css(DOM[:article][:cover]).children.children.first.attr('src') rescue pulse[:cover] = nil end pulse[:url] = url pulse[:title] = title pulse[:author] = article.css(DOM[:article][:meta]).first.children[1].children[1].text pulse[:date] = JSON.parse(article.css(DOM[:article][:meta]).first.attr('data-cdata'))['timestamp'].to_i pulse[:content] = parse_article_body(url, is_a_video) pulse[:tags] = title.downcase.split return pulse rescue return 'There was a problem creating the article in the PolygonCOM::News Service' end |
#perform ⇒ Object
64 65 66 67 68 69 70 |
# File 'lib/GDNewsScraper/scrapers/polygon_com/news.rb', line 64 def perform @page.css(DOM[:article][:container]).each do |article| stream[:articles] << parse(article) end rescue return 'There was a problem performing the initial task in the PolygonCOM::News Service' end |