Class: Browser::Cache

Inherits:
Object show all
Includes:
Enumerable
Defined in:
lib/epitools/browser/cache.rb

Overview

An SQLite3-backed browser cache (with gzip compressed pages)

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Enumerable

#*, #**, #average, #blank?, #combination, #counts, #cross_product, #foldl, #group_neighbours_by, #grouped_to_h, #groups, #map_recursively, #parallel_map, #permutation, #powerset, #reverse, #reverse_each, #rle, #rzip, #select_recursively, #skip, #sort_numerically, #split_after, #split_at, #split_before, #split_between, #sum, #to_iter, #uniq, #unzip

Methods included from Array::ToCSV

#to_csv, #to_tsv

Constructor Details

#initialize(filename = "browsercache.db", agent = nil) ⇒ Cache

Returns a new instance of Cache.



15
16
17
18
19
20
21
22
23
# File 'lib/epitools/browser/cache.rb', line 15

def initialize(filename="browsercache.db", agent=nil)
  @agent    = agent
  @filename = filename

  @db = SQLite3::Database.new(filename)
  @db.busy_timeout(50)

  create_tables
end

Instance Attribute Details

#agentObject (readonly)

Returns the value of attribute agent.



13
14
15
# File 'lib/epitools/browser/cache.rb', line 13

def agent
  @agent
end

#dbObject (readonly)

Returns the value of attribute db.



13
14
15
# File 'lib/epitools/browser/cache.rb', line 13

def db
  @db
end

Instance Method Details

#clear(pattern = nil) ⇒ Object



152
153
154
155
156
157
158
# File 'lib/epitools/browser/cache.rb', line 152

def clear(pattern=nil)
  if pattern
    db.execute("DELETE FROM cache WHERE url LIKE '%#{pattern}%'")
  else
    db.execute("DELETE FROM cache")
  end
end

#countObject Also known as: size



29
30
31
# File 'lib/epitools/browser/cache.rb', line 29

def count
  db.execute("SELECT COUNT(1) FROM cache").first.first.to_i
end

#delete!Object



179
180
181
182
# File 'lib/epitools/browser/cache.rb', line 179

def delete!
  db.close
  File.unlink @filename
end

#each(&block) ⇒ Object



160
161
162
# File 'lib/epitools/browser/cache.rb', line 160

def each(&block)
  pages_via_sql("SELECT * FROM cache", &block)
end

#each_urlObject



164
165
166
167
168
# File 'lib/epitools/browser/cache.rb', line 164

def each_url
  db.execute("SELECT url FROM cache") do |row|
    yield row.first
  end
end

#expire(url) ⇒ Object



170
171
172
# File 'lib/epitools/browser/cache.rb', line 170

def expire(url)
  db.execute("DELETE FROM cache WHERE url = ?", url)
end

#get(url) ⇒ Object



127
128
129
130
131
132
133
134
135
# File 'lib/epitools/browser/cache.rb', line 127

def get(url)
  pages = pages_via_sql("SELECT * FROM cache WHERE url = ?", url.to_s)

  if pages.any?
    pages.first
  else
    nil
  end
end

#grep(pattern, &block) ⇒ Object



123
124
125
# File 'lib/epitools/browser/cache.rb', line 123

def grep(pattern, &block)
  pages_via_sql("SELECT * FROM cache WHERE url like '%#{pattern}%'", &block)
end

#includes?(url) ⇒ Boolean Also known as: include?

Returns:

  • (Boolean)


137
138
139
# File 'lib/epitools/browser/cache.rb', line 137

def includes?(url)
  db.execute("SELECT url FROM cache WHERE url = ?", url.to_s).any?
end

#inspectObject



25
26
27
# File 'lib/epitools/browser/cache.rb', line 25

def inspect
  "#<Browser::Cache filename=#{@filename.inspect}, count=#{count}, size=#{File.size @filename} bytes>"
end

#pages_via_sql(*args, &block) ⇒ Object



112
113
114
115
116
117
118
119
120
121
# File 'lib/epitools/browser/cache.rb', line 112

def pages_via_sql(*args, &block)
  dmsg [:pages_via_sql, args]
  if block_given?
    db.execute(*args) do |row|
      yield row_to_page(row)
    end
  else
    db.execute(*args).map{|row| row_to_page(row) }
  end
end

#put(page, original_url = nil, **options) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/epitools/browser/cache.rb', line 40

def put(page, original_url=nil, **options)
  dmsg [:put, original_url]

  raise "Invalid page" unless valid_page?(page)

  url = page.uri.to_s

  dmsg [:page_uri, url]
  dmsg [:original_url, url]

  if url != original_url
    # redirect original_url to url
    expire(original_url) if options[:overwrite]
    db.execute(
      "INSERT INTO cache VALUES ( ?, ?, ?, ? )",
      original_url,
      page.content_type,
      nil,
      url
    )
  end

  #compressed_body = page.body
  compressed_body = Zlib::Deflate.deflate(page.body)

  expire(url) if options[:overwrite]
  db.execute(
    "INSERT INTO cache VALUES ( ?, ?, ?, ? )",
    url,
    page.content_type,
    SQLite3::Blob.new( compressed_body  ),
    nil
  )

  true

rescue SQLite3::SQLException => e
  p [:exception, e]
  false
end

#recreate_tablesObject



174
175
176
177
# File 'lib/epitools/browser/cache.rb', line 174

def recreate_tables
  drop_tables rescue nil
  create_tables
end

#row_to_page(row) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/epitools/browser/cache.rb', line 81

def row_to_page(row)
  url, content_type, compressed_body, redirect = row

  if redirect
    get(redirect)
  else
    #body = compressed_body
    body = Zlib::Inflate.inflate(compressed_body)

    if content_type =~ %r{^(text/html|text/xml|application/xhtml\+xml)}i
      Mechanize::Page.new(
        #initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
        URI.parse(url),
        {'content-type'=>content_type},
        body,
        nil,
        agent
      )
    else
      Mechanize::File.new(
        #initialize(uri=nil, response=nil, body=nil, code=nil
        URI.parse(url),
        {'content-type'=>content_type},
        body,
        nil
      )
    end

  end
end

#urls(pattern = nil) ⇒ Object



143
144
145
146
147
148
149
150
# File 'lib/epitools/browser/cache.rb', line 143

def urls(pattern=nil)
  if pattern
    rows = db.execute("SELECT url FROM cache WHERE url LIKE '%#{pattern}%'")
  else
    rows = db.execute('SELECT url FROM cache')
  end
  rows.map{|row| row.first}
end

#valid_page?(page) ⇒ Boolean

Returns:

  • (Boolean)


35
36
37
# File 'lib/epitools/browser/cache.rb', line 35

def valid_page?(page)
  [:body, :content_type, :uri].all?{|m| page.respond_to? m }
end