Class: Sinew::Curler

Inherits:
Object
  • Object
show all
Defined in:
lib/sinew/curler.rb

Defined Under Namespace

Classes: Error

Constant Summary collapse

DEFAULT_OPTIONS =
{
  :cache_errors => true,
  :max_time => 30,
  :retry => 3,
  :verbose => true,
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Curler

Returns a new instance of Curler.



16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/sinew/curler.rb', line 16

def initialize(options = {})
  @options = DEFAULT_OPTIONS.merge(options)
  @curl_args = ["--silent", "--fail", "--user-agent", @options[:user_agent], "--max-time", @options[:max_time], "--retry", @options[:retry], "--location", "--max-redirs", "3"]
  @last_request = Time.at(0)      

  @root = @options[:dir]
  if !@root
    if File.exists?(ENV["HOME"]) && File.stat(ENV["HOME"]).writable?
      @root = "#{ENV["HOME"]}/.sinew"
    else
      @root = "/tmp/sinew"
    end
  end
end

Instance Attribute Details

#rootObject (readonly)

Returns the value of attribute root.



14
15
16
# File 'lib/sinew/curler.rb', line 14

def root
  @root
end

#uriObject (readonly)

Returns the value of attribute uri.



14
15
16
# File 'lib/sinew/curler.rb', line 14

def uri
  @uri
end

#urlObject (readonly)

Returns the value of attribute url.



14
15
16
# File 'lib/sinew/curler.rb', line 14

def url
  @url
end

Class Method Details

.uri_to_path(uri) ⇒ Object



161
162
163
164
165
# File 'lib/sinew/curler.rb', line 161

def self.uri_to_path(uri)
  s = uri.path
  s = "#{s}?#{uri.query}" if uri.query
  "#{Util.pathify(uri.host)}/#{Util.pathify(s)}"
end

.url_to_path(url) ⇒ Object



157
158
159
# File 'lib/sinew/curler.rb', line 157

def self.url_to_path(url)
  uri_to_path(url_to_uri(url))
end

.url_to_uri(url) ⇒ Object



151
152
153
154
155
# File 'lib/sinew/curler.rb', line 151

def self.url_to_uri(url)
  url = url.gsub(" ", "%20")
  url = url.gsub("'", "%27")
  URI.parse(url)
end

Instance Method Details

#curl(url, body) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/sinew/curler.rb', line 39

def curl(url, body)
  #
  # prepare url/uri and calculate paths
  #

  @uri = url.is_a?(URI) ? url : Curler.url_to_uri(url.to_s)
  @url = @uri.to_s

  path = fullpath(@uri)
  path = "#{path},#{Util.pathify(body)}" if body

  # shorten long paths
  if path.length > 250
    dir, base = File.dirname(path), File.basename(path)
    path = "#{dir}/#{Util.md5(base)}"
  end
  
  head = "#{File.dirname(path)}/head/#{File.basename(path)}"

  if !File.exists?(path)
    verbose(body ? "curl #{@url} (POST)" : "curl #{@url}")
    tmp = "/tmp/curler_#{Util.random_text(6)}"
    tmph = "#{tmp}.head"
    begin
      rate_limit
      Util.mkdir_if_necessary(File.dirname(path))
      Util.mkdir_if_necessary(File.dirname(head))        
      begin
        command = []
        command += @curl_args
        if body
          command += ["--data-binary", body]
          command += ["--header", "Content-Type: application/x-www-form-urlencoded"]
        end
        command += ["--output", tmp]
        command += ["--dump-header", tmph]
        command << @url
        
        Util.run("curl", command)

        # empty response?
        if !File.exists?(tmp)
          Util.touch(tmp)
          Util.touch(tmph)            
        end
      rescue Util::RunError => e
        message = "curl error"
        if e.message =~ /(\d+)$/
          message = "#{message} (#{$1})"
        end
        
        # cache the error?
        if @options[:cache_errors]
          File.open(path, "w") { |f| f.puts "" }
          File.open(head, "w") { |f| f.puts "CURLER_ERROR\t#{message}" }
        end
        
        raise Error, message
      end
      Util.mv(tmp, path)
      Util.mv(tmph, head)
    ensure
      Util.rm_if_necessary(tmp)
      Util.rm_if_necessary(tmph)
    end
  end
  
  #
  # handle redirects (recalculate @uri/@url)
  #

  if File.exists?(head)
    head_contents = File.read(head)
    # handle cached errors
    if head_contents =~ /^CURLER_ERROR\t(.*)/
      raise Error, $1
    end
    original = @uri
    head_contents.scan(/\A(HTTP\/\d\.\d (\d+).*?\r\n\r\n)/m) do |i|
      headers, code = $1, $2
      if code =~ /^3/
        if redir = headers[/^Location: ([^\r\n]+)/, 1]
          @uri += redir
          @url = @uri.to_s
        end
      end
    end
    # kill unnecessary head files
    if original == @uri
      Util.rm(head)
    end
  end
  
  path
end

#fullpath(uri) ⇒ Object

helpers



143
144
145
# File 'lib/sinew/curler.rb', line 143

def fullpath(uri)
  "#{@root}/#{Curler.uri_to_path(uri)}"  
end

#get(url) ⇒ Object



31
32
33
# File 'lib/sinew/curler.rb', line 31

def get(url)
  curl(url, nil)
end

#post(url, body) ⇒ Object



35
36
37
# File 'lib/sinew/curler.rb', line 35

def post(url, body)
  curl(url, body)
end

#rate_limitObject



167
168
169
170
171
# File 'lib/sinew/curler.rb', line 167

def rate_limit
  sleep = (@last_request + 1) - Time.now
  sleep(sleep) if sleep > 0
  @last_request = Time.now
end

#uncache!(url) ⇒ Object



147
148
149
# File 'lib/sinew/curler.rb', line 147

def uncache!(url)
  Util.rm_if_necessary("#{@root}/#{Curler.url_to_path(url)}")
end

#verbose(s) ⇒ Object



135
136
137
# File 'lib/sinew/curler.rb', line 135

def verbose(s)
  $stderr.puts s if @options[:verbose]
end