Class: SimpleOpencBot
- Inherits:
-
Object
show all
- Includes:
- OpencBot
- Defined in:
- lib/simple_openc_bot.rb
Defined Under Namespace
Classes: BaseLicenceRecord
Constant Summary
Constants included
from OpencBot
OpencBot::VERSION
Class Method Summary
collapse
Instance Method Summary
collapse
Methods included from OpencBot
#db_location, #db_name, #export, extended, #insert_or_update, #root_directory, #save_data, #save_run_report, #spotcheck, #sqlite_busy_timeout, #table_summary, #unlock_database, #verbose?
#normalise_utf8_spaces, #strip_all_spaces
Class Method Details
.inherited(obj) ⇒ Object
17
18
19
20
21
|
# File 'lib/simple_openc_bot.rb', line 17
def self.inherited(obj)
path, = caller[0].partition(":")
path = File.expand_path(File.join(File.dirname(path), ".."))
@@simple_app_directory = path
end
|
.yields(*fields) ⇒ Object
12
13
14
15
|
# File 'lib/simple_openc_bot.rb', line 12
def self.yields(*fields)
raise "We currently only support one Record type per bot" if fields.count > 1
self._yields = fields
end
|
Instance Method Details
#all_stored_records(opts = {}) ⇒ Object
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
# File 'lib/simple_openc_bot.rb', line 99
def all_stored_records(opts={})
if opts[:only_unexported]
opts[:limit] ||= opts[:batch]
end
select = opts[:select] || "ocdata.*"
table = opts[:table] || "ocdata"
where = (opts[:where] ? "\nWHERE #{opts[:where]}\n" : "\nWHERE 1 \n")
order = (opts[:order] ? "\nORDER BY #{opts[:order]}\n" : "")
limit = (opts[:limit] ? "\nLIMIT #{opts[:limit]}\n" : "")
if opts[:only_unexported]
where += " AND (_last_exported_at IS NULL "\
"OR _last_exported_at < _last_updated_at)"
if !opts[:specific_ids].blank?
ids = opts[:specific_ids].map{|id| "'#{id}'"}.join(",")
where += " AND #{_yields[0].unique_field} IN (#{ids})"
end
end
if opts[:count]
sql = "COUNT(*) AS count from #{table} #{where}"
puts sql if opts[:debug]
select(sql)
else
sql = "#{select} from #{table} #{where} #{order} #{limit}"
puts sql if opts[:debug]
select_records(sql)
end
end
|
#check_unique_index(record_class) ⇒ Object
should this be a method in sqlite_magic gem?
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
# File 'lib/simple_openc_bot.rb', line 73
def check_unique_index(record_class)
indexes = sqlite_magic_connection.execute("PRAGMA INDEX_LIST('ocdata')")
db_unique_fields = indexes.map do |i|
next if i["unique"] != 1
next unless i["name"] =~ /autoindex/
info = sqlite_magic_connection.execute("PRAGMA INDEX_INFO('#{i["name"]}')")
info.map{|x| x["name"]}
end.compact.flatten
record_unique_fields = record_class.unique_fields.map(&:to_s)
if db_unique_fields != record_unique_fields
sqlite_magic_connection.execute("ROLLBACK")
error = "Unique fields #{record_unique_fields} do not match the unique index(es) in `ocdata` table!"
error += "\nThis is usually because the value of unique_field has changed since the table was automatically created."
error += "\nUnique fields in `ocdata`: #{db_unique_fields.flatten}; in record #{record_class.name}: #{record_unique_fields}"
raise error
end
end
|
#count_stored_records ⇒ Object
91
92
93
94
95
96
97
|
# File 'lib/simple_openc_bot.rb', line 91
def count_stored_records
begin
all_stored_records(:count => true).first["count"]
rescue SqliteMagic::NoSuchTable
0
end
end
|
#export_data(opts = {}) ⇒ Object
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
|
# File 'lib/simple_openc_bot.rb', line 143
def export_data(opts={})
begin
sqlite_magic_connection.add_columns(
'ocdata', [:_last_exported_at, :_last_updated_at])
rescue SQLite3::SQLException
end
Enumerator.new do |yielder|
b = 1
loop do
if opts[:all]
break if b > 1
batch = all_stored_records(opts)
else
batch = unexported_stored_records(:batch => 100, :specific_ids => opts[:specific_ids])
end
break if batch.empty?
updates = {}
batch.map do |record|
pipeline_data = record.to_pipeline
next if pipeline_data.nil?
updates[record.class.name] ||= []
if !opts[:all]
updates[record.class.name] << record.to_hash.merge(
:_last_exported_at => Time.now.iso8601(2))
else
updates[record.class.name] << record.to_hash
end
yielder << pipeline_data
end
sqlite_magic_connection.execute("BEGIN TRANSACTION")
if b == 1
check_unique_index(_yields[0])
end
updates.each do |k, v|
save_data(k.constantize.unique_fields, v)
end
sqlite_magic_connection.execute("COMMIT")
b += 1
end
end
end
|
#select_records(sql) ⇒ Object
139
140
141
|
# File 'lib/simple_openc_bot.rb', line 139
def select_records(sql)
select(sql).map { |record| record['_type'].constantize.new(record) }
end
|
#spotcheck_data ⇒ Object
188
189
190
191
192
193
|
# File 'lib/simple_openc_bot.rb', line 188
def spotcheck_data
batch = spotcheck_records
batch.collect do |record|
record.to_pipeline
end
end
|
#spotcheck_records(limit = 5) ⇒ Object
135
136
137
|
# File 'lib/simple_openc_bot.rb', line 135
def spotcheck_records(limit = 5)
all_stored_records(:order => "RANDOM()", :limit => limit)
end
|
#sqlite_magic_connection ⇒ Object
Override default in ScraperWiki gem
24
25
26
27
|
# File 'lib/simple_openc_bot.rb', line 24
def sqlite_magic_connection
db = @config ? @config[:db] : File.expand_path(File.join(@@simple_app_directory, 'db', db_name))
@sqlite_magic_connection ||= SqliteMagic::Connection.new(db)
end
|
#unexported_stored_records(opts = {}) ⇒ Object
131
132
133
|
# File 'lib/simple_openc_bot.rb', line 131
def unexported_stored_records(opts={})
all_stored_records(opts.merge!(:only_unexported => true))
end
|
#update_data(opts = {}) ⇒ Object
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
# File 'lib/simple_openc_bot.rb', line 29
def update_data(opts={})
if opts[:specific_ids].nil? || opts[:specific_ids].empty?
record_enumerator = Enumerator.new do |yielder|
fetch_all_records(opts) do |result|
yielder.yield(result)
end
end
else
record_enumerator = Enumerator.new do |yielder|
fetch_specific_records(opts) do |result|
yielder.yield(result)
end
end
end
saves_count = 0
batch_size = opts[:test_mode] ? 1 : 500
record_enumerator.each_slice(batch_size) do |records|
begin
sqlite_magic_connection.execute("BEGIN TRANSACTION")
records.each do |record|
insert_or_update(record.class.unique_fields,
record.to_hash)
saves_count += 1
if saves_count == 1
raise "Bot must specify what record type it will yield" if _yields.nil?
check_unique_index(_yields[0])
end
STDOUT.print(".")
STDOUT.flush
end
ensure
sqlite_magic_connection.execute("COMMIT") if sqlite_magic_connection.database.transaction_active?
end
end
save_run_report(:status => 'success', :completed_at => Time.now)
saves_count
end
|
#validate_data(opts = {}) ⇒ Object
195
196
197
198
199
200
201
202
203
204
|
# File 'lib/simple_openc_bot.rb', line 195
def validate_data(opts={})
opts = {:limit => 1000}.merge(opts)
errors = all_stored_records(opts).map do |record|
record.errors
end.compact
total = count_stored_records
selected = [opts[:limit], total].min
puts "NOTICE: only validated first #{selected} of #{total} records"
errors
end
|