Class: Hongkong::News::Scrapers::OrientalDailyScraper

Inherits:

Object

Object
Hongkong::News::Scrapers::OrientalDailyScraper

show all

Includes:: PhantomScraper

Defined in:: lib/hongkong/news/scrapers/oriental_daily_scraper.rb

Constant Summary collapse

LIST_URL =

"http://orientaldaily.on.cc/"

Instance Method Summary collapse

#name ⇒ Object
#news(url) ⇒ Object

Extract article from page.
#news_links ⇒ Object

Extract all news links.

Methods included from PhantomScraper

#doc, #html, #screenshot_data

Instance Method Details

#name ⇒ `Object`



12
13
14

# File 'lib/hongkong/news/scrapers/oriental_daily_scraper.rb', line 12

def name
  "orientaldaily"
end

#news(url) ⇒ `Object`

Extract article from page

# File 'lib/hongkong/news/scrapers/oriental_daily_scraper.rb', line 29

def news(url)
  visit url

  # wait for content to be loaded
  first("#contentCTN-right")
  
  document = Document.new
  document.source = name
  document.title = doc.search("h1").text
  document.url = url
  document.html = html
  document.content = page.evaluate_script("HongKongNews.getInnerText('#contentCTN-top')") + "\n" + page.evaluate_script("HongKongNews.getInnerText('#contentCTN-right')")
  document.screenshot_data = screenshot_data

  image = doc.search("#contentCTN .photo img").first
  document.image_url = URI::join(url, image["src"]).to_s if image

  document
end

#news_links ⇒ `Object`

Extract all news links

# File 'lib/hongkong/news/scrapers/oriental_daily_scraper.rb', line 17

def news_links
  visit LIST_URL

  all("#articleListSELECT option").collect do |option|
    link = Link.new
    link.title = option.text
    link.url = URI::join(LIST_URL, option["value"]).to_s
    link
  end.reject { |l| l.url.to_s.end_with?("#") }
end

Class: Hongkong::News::Scrapers::OrientalDailyScraper

Constant Summary collapse

Instance Method Summary collapse

Methods included from PhantomScraper

Instance Method Details

#name ⇒ Object

#news(url) ⇒ Object

#news_links ⇒ Object

#name ⇒ `Object`

#news(url) ⇒ `Object`

#news_links ⇒ `Object`