13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
# File 'lib/feed2gram/parses_entries.rb', line 13
def parse(feed_url)
feed = Nokogiri::XML(URI.parse(feed_url).open)
feed.xpath("//*:entry").map { |entry|
html = Nokogiri::HTML(entry.xpath("*:content[1]").text)
medias = html.xpath("//figure[1]/img").map { |img|
Media.new(
media_type: (img["data-media-type"] || "image").upcase,
url: img["src"],
cover_url: img["data-cover-url"]
)
}
Post.new(
media_type: determine_post_media_type(html, medias),
url: entry.xpath("*:id[1]").text,
medias: medias,
caption: html.xpath("//figure[1]/figcaption").text.strip
)
}.select { |post|
if post.medias.empty?
warn "Skipping post with no <img> tag: #{post.url}"
elsif ["STORIES", "REELS"].include?(post.media_type) && post.medias.size > 1
warn "Skipping #{post.media_type.downcase} with more than one <img> tag (only one allowed): #{post.url}"
else
true
end
}
end
|