Class: TaiwaneseNewsParser::Parser::Ettoday
Instance Attribute Summary
#article, #url
Class Method Summary
collapse
Instance Method Summary
collapse
applicable?, applicable_parser, #clean_up, #initialize, subclasses
Class Method Details
.domain ⇒ Object
2
3
4
|
# File 'lib/taiwanese_news_parser/parser/ettoday.rb', line 2
def self.domain
'ettoday.net'
end
|
.names ⇒ Object
6
7
8
|
# File 'lib/taiwanese_news_parser/parser/ettoday.rb', line 6
def self.names
%w{東森}
end
|
.parse_url_id(url) ⇒ Object
46
47
48
|
# File 'lib/taiwanese_news_parser/parser/ettoday.rb', line 46
def self.parse_url_id(url)
url[%r{http://www\.ettoday\.net/\w+/(\d+/\d+)},1]
end
|
Instance Method Details
#clean_url ⇒ Object
41
42
43
44
|
# File 'lib/taiwanese_news_parser/parser/ettoday.rb', line 41
def clean_url
cleaner = TaiwaneseNewsParser::UrlCleaner.new()
@article[:url] = cleaner.clean(@article[:url])
end
|
#doc ⇒ Object
10
11
12
13
|
# File 'lib/taiwanese_news_parser/parser/ettoday.rb', line 10
def doc
@raw = open(url).read
@doc = Nokogiri::HTML(@raw)
end
|
#parse ⇒ Object
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
# File 'lib/taiwanese_news_parser/parser/ettoday.rb', line 16
def parse
@article[:title] = doc.css('[itemprop=headline]').text
@article[:company_name] = '東森'
@article[:content] = doc.css('[itemprop=articleBody]>p').text
@article[:reporter_name] = parse_reporter_name()
t = doc.css('.news-time').text.match(/(\d*)年(\d*)月(\d*)日 (\d*):(\d*)/)
@article[:published_at] = Time.new(t[1],t[2],t[3],t[4],t[5])
clean_up
@article
end
|
#parse_reporter_name ⇒ Object
33
34
35
36
37
38
39
|
# File 'lib/taiwanese_news_parser/parser/ettoday.rb', line 33
def parse_reporter_name
text = doc.css('[itemprop=articleBody]').text
if match = text.match(%r{記者(.+?)[//╱/]})
reporter_name = match[1]
end
reporter_name
end
|
#reproduced? ⇒ Boolean
50
51
52
|
# File 'lib/taiwanese_news_parser/parser/ettoday.rb', line 50
def reproduced?
false
end
|