Class: TaiwaneseNewsParser::Parser::LibertyTimesBig5
Instance Attribute Summary
#article, #url
Class Method Summary
collapse
Instance Method Summary
collapse
applicable_parser, #clean_up, #doc, #initialize, #reproduced?, subclasses
Class Method Details
.applicable?(url) ⇒ Boolean
10
11
12
|
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 10
def self.applicable?(url)
url.match(%r{libertytimes\.com\.tw/\d+/\w+/\w+/\d+/.+\.htm})
end
|
.domain ⇒ Object
2
3
4
|
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 2
def self.domain
'libertytimes.com.tw'
end
|
.names ⇒ Object
6
7
8
|
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 6
def self.names
%{自由時報}
end
|
.parse_url_id(url) ⇒ Object
48
49
50
|
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 48
def self.parse_url_id(url)
url[%r{http://www\.libertytimes\.com\.tw/(.*)\.htm},1]
end
|
Instance Method Details
#clean_url ⇒ Object
43
44
45
46
|
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 43
def clean_url
cleaner = TaiwaneseNewsParser::UrlCleaner.new('')
@article[:url] = cleaner.clean(@article[:url])
end
|
#parse ⇒ Object
15
16
17
18
19
20
21
22
23
24
25
26
|
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 15
def parse
@article[:title] = doc.at_css('#newtitle').text
@article[:company_name] = parse_company_name
@article[:content] = doc.css('#newsContent>span:not(#newtitle)>p:not(.picture)').text
@article[:reporter_name] = parse_reporter_name()
@article[:published_at] = Time.parse(doc.at_css('#date').text)
clean_up
@article
end
|
#parse_company_name ⇒ Object
39
40
41
|
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 39
def parse_company_name
'自由時報'
end
|
#parse_reporter_name ⇒ Object
28
29
30
31
32
33
34
35
36
37
|
# File 'lib/taiwanese_news_parser/parser/liberty_times_big5.rb', line 28
def parse_reporter_name
if match = @article[:content].match(%r{〔(.*?)[//╱](.*?)〕})
reporter_name = match[1][%r{記者(.+)},1]
elsif match = @article[:content].match(%r{記者(.+?)[//╱]})
reporter_name = match[1]
elsif match = @article[:content].match(%r{(文/(.*?))})
reporter_name = match[1]
end
reporter_name
end
|