Class: TaiwaneseNewsParser::Parser
- Inherits:
-
Object
- Object
- TaiwaneseNewsParser::Parser
show all
- Extended by:
- Memoist
- Defined in:
- lib/taiwanese_news_parser/parser.rb
Direct Known Subclasses
AppleDaily, ChinaTimes, ChinaTimesMoney, Cna, Cts, Ettoday, LibertyTimes, LibertyTimesBig5, LibertyTimesNews, NowNews, Tvbs, Udn
Defined Under Namespace
Classes: AppleDaily, ChinaTimes, ChinaTimesMoney, Cna, Cts, Ettoday, LibertyTimes, LibertyTimesBig5, LibertyTimesNews, NowNews, Tvbs, Udn
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
#initialize(url) ⇒ Parser
Returns a new instance of Parser.
25
26
27
28
29
30
31
|
# File 'lib/taiwanese_news_parser/parser.rb', line 25
def initialize(url)
@url = url
@article = {}
@article[:url] = url
@article[:web_domain] = self.class.domain()
@article[:url_id] = self.class.parse_url_id(url)
end
|
Instance Attribute Details
#article ⇒ Object
Returns the value of attribute article.
8
9
10
|
# File 'lib/taiwanese_news_parser/parser.rb', line 8
def article
@article
end
|
#url ⇒ Object
Returns the value of attribute url.
7
8
9
|
# File 'lib/taiwanese_news_parser/parser.rb', line 7
def url
@url
end
|
Class Method Details
.applicable?(url) ⇒ Boolean
10
11
12
|
# File 'lib/taiwanese_news_parser/parser.rb', line 10
def self.applicable?(url)
url.include?(domain())
end
|
.applicable_parser(url) ⇒ Object
14
15
16
17
18
19
20
21
22
23
|
# File 'lib/taiwanese_news_parser/parser.rb', line 14
def self.applicable_parser(url)
redirected_url = open(url).base_uri.to_s
parser_class = subclasses.find do |parser_class|
parser_class.applicable?(redirected_url)
end
if parser_class
parser_class.new(redirected_url)
end
end
|
.domain ⇒ Object
56
57
58
|
# File 'lib/taiwanese_news_parser/parser.rb', line 56
def self.domain
raise NotImplementedError
end
|
.subclasses ⇒ Object
52
53
54
|
# File 'lib/taiwanese_news_parser/parser.rb', line 52
def self.subclasses
[ Udn, LibertyTimes, LibertyTimesBig5, LibertyTimesNews, ChinaTimes, ChinaTimesMoney, Cna, AppleDaily, Ettoday, Tvbs, Cts, NowNews ]
end
|
Instance Method Details
#clean_up ⇒ Object
39
40
41
42
43
44
45
|
# File 'lib/taiwanese_news_parser/parser.rb', line 39
def clean_up
[:content, :title, :reporter_name, :company_name].each do |attr|
@article[attr].strip! if @article[attr]
end
clean_url if respond_to?(:clean_url)
@article[:reproduced] = reproduced?
end
|
#doc ⇒ Object
33
34
35
36
|
# File 'lib/taiwanese_news_parser/parser.rb', line 33
def doc
@raw = open(url).read.encode('utf-8', 'big5', :invalid => :replace, :undef => :replace, :replace => '')
@doc = ::Nokogiri::HTML(@raw,url)
end
|
#reproduced? ⇒ Boolean
47
48
49
|
# File 'lib/taiwanese_news_parser/parser.rb', line 47
def reproduced?
!self.class.names.include?(parse_company_name)
end
|