Class: FeedYamlizer
- Inherits:
-
Object
show all
- Includes:
- FileUtils::Verbose
- Defined in:
- lib/feed_yamlizer/html_cleaner.rb,
lib/feed_yamlizer.rb,
lib/feed_yamlizer/version.rb,
lib/feed_yamlizer/feed_parser.rb,
lib/feed_yamlizer/feed_listener.rb,
lib/feed_yamlizer/html_listener.rb
Overview
Defined Under Namespace
Classes: FeedListener, FeedParser, HtmlCleaner, HtmlListener
Constant Summary
collapse
- VERSION =
"0.2.0"
- NEWLINE_PLACEHOLDER =
'+---NEWLINE---+'
- SPACE_PLACEHOLDER =
'+---SPACE---+'
- TAB_PLACEHOLDER =
'+---TAB---+'
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
Returns a new instance of FeedYamlizer.
38
39
40
41
|
# File 'lib/feed_yamlizer.rb', line 38
def initialize(feed)
@feed = feed
@result = {:meta => {}, :items => []}
end
|
Class Method Details
.check_for_tidy ⇒ Object
107
108
109
110
111
|
# File 'lib/feed_yamlizer.rb', line 107
def check_for_tidy
if `which tidy` == ''
abort "Please install tidy"
end
end
|
25
26
27
28
29
30
31
32
33
34
35
36
|
# File 'lib/feed_yamlizer.rb', line 25
def self.format(x, indent=0)
res = IO.popen("fmt -w #{75 - indent}", "r+") do |pipe|
pipe.puts x
pipe.close_write
pipe.read
end
if indent
res.gsub(/^(?=\S)/, ' ' * indent)
else
res
end
end
|
.process_url(url) ⇒ Object
126
127
128
129
130
131
132
133
|
# File 'lib/feed_yamlizer.rb', line 126
def process_url(url)
response = open(url)
charset = response.charset
xml = response.read
encoding = charset || xml_encoding(xml) || "UTF-8"
run xml, encoding
end
|
.process_xml(xml) ⇒ Object
122
123
124
|
# File 'lib/feed_yamlizer.rb', line 122
def process_xml(xml)
run xml, xml_encoding(xml)
end
|
.run(feed_xml, encoding = 'UTF-8') ⇒ Object
114
115
116
117
118
119
120
|
# File 'lib/feed_yamlizer.rb', line 114
def run(feed_xml, encoding='UTF-8')
check_for_tidy
feed_xml = Iconv.conv("UTF-8//TRANSLIT//IGNORE", encoding, feed_xml)
parsed_data = FeedYamlizer::FeedParser.new(feed_xml).result
result = FeedYamlizer.new(parsed_data).result
result
end
|
.xml_encoding(rawxml) ⇒ Object
99
100
101
102
103
104
105
|
# File 'lib/feed_yamlizer.rb', line 99
def xml_encoding(rawxml)
encoding = rawxml.encode("ascii", invalid: :replace, undef: :replace)[/encoding=["']([^"']+)["']/,1]
if $debug
STDERR.puts "xml encoding: #{encoding.inspect}"
end
encoding
end
|
Instance Method Details
53
54
55
56
57
58
59
|
# File 'lib/feed_yamlizer.rb', line 53
def add_feed_metaresult
@result[:meta] = {
:title => inner_text(@feed[:title]),
:link => @feed[:link],
:xml_encoding => @feed[:xml_encoding]
}
end
|
68
69
70
71
72
73
74
75
76
|
# File 'lib/feed_yamlizer.rb', line 68
def add_item_metaresult(item, index)
fields = [:title, :author, :guid, :pub_date, :link, :enclosure, :podcast_image]
x = {:title => inner_text(item[:title])}
metaresult = fields.reduce(x) {|memo, field|
memo[field] = item[field]
memo
}
@result[:items] << metaresult
end
|
#add_items ⇒ Object
61
62
63
64
65
66
|
# File 'lib/feed_yamlizer.rb', line 61
def add_items
@feed[:items].each_with_index {|item, i|
add_item_metaresult item, i
add_raw_content item, i
}
end
|
#add_raw_content(item, index) ⇒ Object
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
# File 'lib/feed_yamlizer.rb', line 78
def add_raw_content(item, index)
content = (item[:content] || item[:summary] || "").gsub(/^\s*/, '').strip
@result[:items][-1][:content] = {:html => content}
simplified = HtmlCleaner.new(content).output
textified = simplified.gsub(FeedYamlizer::NEWLINE_PLACEHOLDER, "\n").
gsub(SPACE_PLACEHOLDER, " ").
gsub(TAB_PLACEHOLDER, " ").
gsub(/^\s+$/, "").
gsub(/\n *\n *\n *$/ , "\n\n")
@result[:items][-1][:content][:text] = textified
end
|
#inner_text(string) ⇒ Object
49
50
51
|
# File 'lib/feed_yamlizer.rb', line 49
def inner_text(string)
Nokogiri::HTML.parse(string).inner_text
end
|
#result ⇒ Object
43
44
45
46
47
|
# File 'lib/feed_yamlizer.rb', line 43
def result
add_feed_metaresult
add_items
@result
end
|