Module: SQ

Defined in:
lib/sq.rb,
lib/version.rb

Class Method Summary collapse

Class Method Details

.format(doc, fmt = '%s.pdf', opts = {}) ⇒ Object

Output a formatted filename.



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/sq.rb', line 44

def format(doc, fmt='%s.pdf', opts={})
  opts[:number] ||= 0
  opts[:count]  ||= 0

  fmt.gsub(/%./) do |f|
    case f
    when '%n' then opts[:number]
    when '%N' then opts[:number]+1
    when '%c' then opts[:count]
    when '%s' then doc[:name].sub(/\.pdf$/i, '')
    when '%S' then doc[:text]
    when '%_' then doc[:text].gsub(/\s+/, '_')
    when '%-' then doc[:text].gsub(/\s+/, '-')
    when '%%' then '%'
    end
  end
end

.process(uri, regex = /./, opts = {}) ⇒ Object

query an URI and download all PDFs which match the regex. It returns the number of downloaded PDFs.



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/sq.rb', line 71

def process(uri, regex=/./, opts={})
  uris = self.query(uri, regex)
  count = uris.count

  puts "Found #{count} PDFs:" if opts[:verbose]

  return 0 if uris.empty?

  out = File.expand_path(opts[:directory] || '.')
  fmt = opts[:format] || '%s.pdf'

  unless Dir.exists?(out)
    puts "-> mkdir #{out}" if opts[:verbose]
    FileUtils.mkdir_p(out)
  end

  p = ProgressBar.create(:title => "PDFs", :total => count)
  i = 0

  uris.each do |u|
    name = format(u, fmt, {:number => i, :count => count})
    i += 1
    open("#{out}/#{name}", 'wb') do |f|
      open(u[:uri], 'rb') do |resp|
        f.write(resp.read)
        p.log name if opts[:verbose]
        p.increment
      end
    end
  end.count
end

.query(uri, regex = /./) ⇒ Object

query an URI and return a list of PDFs. Each PDF is an hash with three keys: :uri is its absolute URI, :name is its name (last part of its URI), and :text is each link text.



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/sq.rb', line 22

def query(uri, regex=/./)
  uri = 'http://' + uri unless uri =~ /^https?:\/\//

  doc = Nokogiri::HTML(open(uri, 'User-Agent' => user_agent))
  links = doc.css('a[href]')

  uris = links.map { |a| [a.text, URI.join(uri, a.attr('href'))] }
  uris.select! { |_,u| u.path =~ /\.pdf$/i && u.to_s =~ regex }

  uris.map do |text,u|
    {
      :uri => u.to_s,
      :name => u.path.split('/').last,
      :text => text
    }
  end
end

.user_agentObject

return the user-agent used by SQ



13
14
15
# File 'lib/sq.rb', line 13

def user_agent
  "SQ/#{version} +github.com/bfontaine/sq"
end

.versionObject



5
6
7
# File 'lib/version.rb', line 5

def version
  '0.1.1'
end