Class: Worldfootball::Page
- Inherits:
-
Object
- Object
- Worldfootball::Page
show all
- Defined in:
- lib/worldfootball/page.rb,
lib/worldfootball/page_team.rb,
lib/worldfootball/page_report.rb,
lib/worldfootball/page_schedule.rb
Defined Under Namespace
Classes: Report, Schedule, Team
Constant Summary
collapse
- GENERATED_RE =
<!– [generated 2020-06-30 22:30:19] –>
<!-- [generated 2020-06-30 22:30:19] -->
%r{
<!--
[ ]+
\[generated
[ ]+
(?<date>\d+-\d+-\d+)
[ ]+
(?<time>\d+:\d+:\d+)
\]
[ ]+
-->
}x
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
#initialize(html) ⇒ Page
10
11
12
13
14
|
# File 'lib/worldfootball/page.rb', line 10
def initialize( html )
@html = html.unicode_normalize
end
|
Class Method Details
.from_file(path) ⇒ Object
5
6
7
8
|
# File 'lib/worldfootball/page.rb', line 5
def self.from_file( path )
html = File.open( path, 'r:utf-8' ) {|f| f.read }
new( html )
end
|
Instance Method Details
#assert(cond, msg) ⇒ Object
102
103
104
105
106
107
108
109
|
# File 'lib/worldfootball/page.rb', line 102
def assert( cond, msg )
if cond
else
puts "!!! assert failed (in parse page) - #{msg}"
exit 1
end
end
|
#debug? ⇒ Boolean
92
|
# File 'lib/worldfootball/page.rb', line 92
def debug?() .debug?; end
|
#doc ⇒ Object
16
17
18
19
|
# File 'lib/worldfootball/page.rb', line 16
def doc
@doc ||= Nokogiri::HTML( @html )
end
|
#generated ⇒ Object
66
67
68
69
70
71
72
73
74
75
76
|
# File 'lib/worldfootball/page.rb', line 66
def generated
@generated ||= begin
m=GENERATED_RE.match( @html )
if m
DateTime.strptime( "#{m[:date]} #{m[:time]}", '%Y-%m-%d %H:%M:%S')
else
puts "!! WARN - no generated timestamp found in page"
nil
end
end
end
|
#generated_in_days_ago ⇒ Object
convenience helper / formatter
79
80
81
82
83
84
85
86
|
# File 'lib/worldfootball/page.rb', line 79
def generated_in_days_ago
if generated
diff_in_days = Date.today.jd - generated.jd
"#{diff_in_days}d"
else
'?'
end
end
|
#keywords ⇒ Object
27
28
29
30
31
32
33
34
35
36
37
38
39
|
# File 'lib/worldfootball/page.rb', line 27
def keywords
@keywords ||= doc.css( 'meta[name="keywords"]' ).first
@keywords[:content]
end
|
#log(msg) ⇒ Object
112
113
114
115
116
117
|
# File 'lib/worldfootball/page.rb', line 112
def log( msg )
File.open( './logs.txt', 'a:utf-8' ) do |f|
f.write( msg )
f.write( "\n" )
end
end
|
#squish(str) ⇒ Object
95
96
97
98
99
100
|
# File 'lib/worldfootball/page.rb', line 95
def squish( str )
str = str.strip
str = str.gsub( "\u{00A0}", ' ' )
str = str.gsub( /[ \t\n]+/, ' ' )
str
end
|
#title ⇒ Object
21
22
23
24
25
|
# File 'lib/worldfootball/page.rb', line 21
def title
@title ||= doc.css( 'title' ).first
@title.text
end
|
#url ⇒ Object
<meta property=“og:url”
content="//www.weltfussball.de/alle_spiele/aut-bundesliga-2010-2011/" />
43
44
45
46
|
# File 'lib/worldfootball/page.rb', line 43
def url
@url ||= doc.css( 'meta[property="og:url"]' ).first
@url[:content]
end
|