Class: TaiwaneseNewsParser::Parser::LibertyTimesBig5

Inherits:
TaiwaneseNewsParser::Parser show all
Defined in:
lib/taiwanese_news_parser/parser/liberty_times_big5.rb

Instance Attribute Summary

Attributes inherited from TaiwaneseNewsParser::Parser

#article, #url

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from TaiwaneseNewsParser::Parser

applicable_parser, #clean_up, #doc, #initialize, #reproduced?, subclasses

Constructor Details

This class inherits a constructor from TaiwaneseNewsParser::Parser

Class Method Details

.applicable?(url) ⇒ Boolean

Returns:

  • (Boolean)


10
11
12
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 10

def self.applicable?(url)
  url.match(%r{libertytimes\.com\.tw/\d+/\w+/\w+/\d+/.+\.htm})
end

.domainObject



2
3
4
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 2

def self.domain
  'libertytimes.com.tw'
end

.namesObject



6
7
8
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 6

def self.names
  %{自由時報}
end

.parse_url_id(url) ⇒ Object



48
49
50
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 48

def self.parse_url_id(url)
  url[%r{http://www\.libertytimes\.com\.tw/(.*)\.htm},1]
end

Instance Method Details

#clean_urlObject



43
44
45
46
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 43

def clean_url
  cleaner = TaiwaneseNewsParser::UrlCleaner.new('')
  @article[:url] = cleaner.clean(@article[:url])
end

#parseObject



15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 15

def parse
  @article[:title] = doc.at_css('#newtitle').text
  @article[:company_name] = parse_company_name
  @article[:content] = doc.css('#newsContent>span:not(#newtitle)>p:not(.picture)').text

  @article[:reporter_name] = parse_reporter_name()
  @article[:published_at] = Time.parse(doc.at_css('#date').text)

  clean_up

  @article
end

#parse_company_nameObject



39
40
41
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 39

def parse_company_name
  '自由時報'
end

#parse_reporter_nameObject



28
29
30
31
32
33
34
35
36
37
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 28

def parse_reporter_name
  if match = @article[:content].match(%r{〔(.*?)[//╱](.*?)〕})
    reporter_name = match[1][%r{記者(.+)},1]
  elsif match = @article[:content].match(%r{記者(.+?)[//╱]})
    reporter_name = match[1]
  elsif match = @article[:content].match(%r{(文/(.*?))})
    reporter_name = match[1]
  end
  reporter_name
end