Class: Hongkong::News::Scrapers::AppleDailyScraper

Inherits:
Object
  • Object
show all
Includes:
PhantomScraper
Defined in:
lib/hongkong/news/scrapers/apple_daily_scraper.rb

Instance Method Summary collapse

Methods included from PhantomScraper

#cleanup, #doc, #html, #screenshot_data

Instance Method Details

#nameObject



9
10
11
# File 'lib/hongkong/news/scrapers/apple_daily_scraper.rb', line 9

def name
  "appledaily"
end

#news(url) ⇒ Object

Extract article from page from Apple Daily



27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/hongkong/news/scrapers/apple_daily_scraper.rb', line 27

def news(url)
  visit url

  document = Document.new
  document.source = name
  document.title = doc.search("#articleContent h1").text.strip
  document.url = url
  document.html = html
  document.content = page.evaluate_script("HongKongNews.getInnerText('#masterContent')")
  document.image_url = doc.search("//meta[@property='og:image']/@content").first.text rescue nil
  document
end

Extract all news links from Apple Daily



14
15
16
17
18
19
20
21
22
23
24
# File 'lib/hongkong/news/scrapers/apple_daily_scraper.rb', line 14

def news_links
  visit "http://hk.apple.nextmedia.com/"

  links = all("#article_ddl option").collect do |option|
    link = Link.new
    link.title = option.text
    link.url = option["value"]
    link
  end.reject { |l| l.url.nil? }
  links
end