Class: TD2Planet::Fetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/td2planet/fetcher.rb

Instance Method Summary collapse

Constructor Details

#initialize(cache_dir, dry_run = false, user_agent = "td2planet") ⇒ Fetcher

Returns a new instance of Fetcher.



13
14
15
16
17
18
19
20
# File 'lib/td2planet/fetcher.rb', line 13

def initialize(cache_dir, dry_run=false, user_agent="td2planet")
  @cache_dir = Pathname.new(cache_dir)
  unless @cache_dir.exist?
    @cache_dir.mkdir
  end
  @dry_run = dry_run
  @user_agent = user_agent
end

Instance Method Details

#fetch_all_rss(uris) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/td2planet/fetcher.rb', line 22

def fetch_all_rss(uris)
  rss_list = []
  uris.each do |uri|
    cache_file = @cache_dir + ERB::Util.u(uri)
    if @dry_run
      puts "use cache: #{cache_file}"
      text = cache_file.read
    else
      text = nil
      begin
        puts "fetch: #{uri}"
        text = URI.parse(uri).read("User-Agent" => @user_agent)
      rescue Timeout::Error
        # fallback
        puts "ERROR: timeout #{uri}"
        text = cache_file.read
      rescue Exception
        puts "ERROR: #{$!} (#{$!.class}) on #{uri}"
        next
      else
        if text.status[0] == '200' && /rss/ =~ text
          cache_file.open('wb'){|f| f.write(text) }
        else
          # fallback
          puts "ERROR: fetch failed #{uri} #{text.status}"
          text = cache_file.read
        end
      end
    end
    text = fixup_rss(text)
    rss_list << RSS::Parser.parse(text, false)
  end
  rss_list
end

#fixup_rss(text) ⇒ Object

euc-jp may fail to parse



58
59
60
# File 'lib/td2planet/fetcher.rb', line 58

def fixup_rss(text)
  text.sub(/\bencoding="euc-jp"/ni, 'encoding="euc-jp-ms"')
end