Class: TaiwaneseNewsParser::Parser::LibertyTimes
Instance Attribute Summary
#article, #url
Class Method Summary
collapse
Instance Method Summary
collapse
applicable_parser, #clean_up, #initialize, #reproduced?, subclasses
Class Method Details
.applicable?(url) ⇒ Boolean
10
11
12
|
# File 'lib/taiwanese_news_parser/parser/liberty_times.rb', line 10
def self.applicable?(url)
url.include?('iservice.ltn.com.tw')
end
|
.domain ⇒ Object
2
3
4
|
# File 'lib/taiwanese_news_parser/parser/liberty_times.rb', line 2
def self.domain
'libertytimes.com.tw'
end
|
.names ⇒ Object
6
7
8
|
# File 'lib/taiwanese_news_parser/parser/liberty_times.rb', line 6
def self.names
%{自由時報}
end
|
.parse_url_id(url) ⇒ Object
62
63
64
|
# File 'lib/taiwanese_news_parser/parser/liberty_times.rb', line 62
def self.parse_url_id(url)
url[%r{news\.php\?no=(\d+)},1]
end
|
Instance Method Details
#clean_url ⇒ Object
57
58
59
60
|
# File 'lib/taiwanese_news_parser/parser/liberty_times.rb', line 57
def clean_url
cleaner = TaiwaneseNewsParser::UrlCleaner.new('no')
@article[:url] = cleaner.clean(@article[:url])
end
|
#doc ⇒ Object
14
15
16
17
|
# File 'lib/taiwanese_news_parser/parser/liberty_times.rb', line 14
def doc
@raw = open(url).read
@doc = Nokogiri::HTML(@raw)
end
|
#parse ⇒ Object
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
# File 'lib/taiwanese_news_parser/parser/liberty_times.rb', line 19
def parse
@article[:title] = doc.at_css('.content h1 text()').text
@article[:company_name] = parse_company_name
@article[:content] = doc.css('.content p').text
time = doc.at_css('.content .date').text[%r{\d{4}-\d{1,2}-\d{1,2} \d{2}:\d{2}}]
if time.nil?
match = doc.at_css('.content .date').text.match(%r{(\d{2}):(\d{2})})
now = Time.now
today = Date.today
@article[:published_at] = Time.new(today.year, today.month, today.day, match[1].to_i, match[2].to_i)
else
@article[:published_at] = Time.parse("#{time}:00")
end
@article[:reporter_name] = parse_reporter_name()
clean_up
@article
end
|
#parse_company_name ⇒ Object
53
54
55
|
# File 'lib/taiwanese_news_parser/parser/liberty_times.rb', line 53
def parse_company_name
'自由時報'
end
|
#parse_reporter_name ⇒ Object
42
43
44
45
46
47
48
49
50
51
|
# File 'lib/taiwanese_news_parser/parser/liberty_times.rb', line 42
def parse_reporter_name
if match = @article[:content].match(%r{〔(.*?)[//╱](.*?)〕})
reporter_name = match[1][%r{記者(.+)},1]
elsif match = @article[:content].match(%r{記者(.+?)[//╱]})
reporter_name = match[1]
elsif match = @article[:content].match(%r{(文/(.*?))})
reporter_name = match[1]
end
reporter_name
end
|