Module: OpencBot::Helpers::RegisterMethods

Included in:
AlphaSearch, IncrementalSearch
Defined in:
lib/openc_bot/helpers/register_methods.rb

Constant Summary collapse

MAX_BUSY_RETRIES =
3

Instance Method Summary collapse

Instance Method Details

#computed_registry_url(uid) ⇒ Object

stub method. Override in including module if this can be computed from uid



70
71
# File 'lib/openc_bot/helpers/register_methods.rb', line 70

def computed_registry_url(uid)
end

#datum_exists?(uid) ⇒ Boolean

Returns:

  • (Boolean)


15
16
17
# File 'lib/openc_bot/helpers/register_methods.rb', line 15

def datum_exists?(uid)
  !!select("ocdata.#{primary_key_name} FROM ocdata WHERE #{primary_key_name} = ? LIMIT 1", uid).first
end

#export_dataObject



30
31
32
33
34
35
# File 'lib/openc_bot/helpers/register_methods.rb', line 30

def export_data
  sql_query = "ocdata.* from ocdata"
  select(sql_query).each do |res|
    yield post_process(res, true)
  end
end

#fetch_dataObject

fetches and saves data. By default assumes an incremental search, or an alpha search if USE_ALPHA_SEARCH is set. This method should be overridden if you are going to do a different type of data import, e.g from a CSV file.



22
23
24
25
26
27
28
# File 'lib/openc_bot/helpers/register_methods.rb', line 22

def fetch_data
  if use_alpha_search
    fetch_data_via_alpha_search
  else
    fetch_data_via_incremental_search
  end
end

#fetch_registry_page(company_number) ⇒ Object



37
38
39
# File 'lib/openc_bot/helpers/register_methods.rb', line 37

def fetch_registry_page(company_number)
  _client.get_content(registry_url(company_number))
end

#post_process(row_hash, skip_nulls = false) ⇒ Object



172
173
174
175
# File 'lib/openc_bot/helpers/register_methods.rb', line 172

def post_process(row_hash, skip_nulls=false)
  # many of the fields will be serialized json and so we convert to ruby objects
  convert_json_to_ruby(row_hash.except(:data), skip_nulls)
end

#prepare_and_save_data(all_data, options = {}) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/openc_bot/helpers/register_methods.rb', line 41

def prepare_and_save_data(all_data,options={})
  data_to_be_saved = prepare_for_saving(all_data)
  fail_count, retry_interval = 0, 5
  begin
    insert_or_update([primary_key_name], data_to_be_saved)
  rescue SQLite3::BusyException => e
    fail_count += 1
    if fail_count <= MAX_BUSY_RETRIES
      puts "#{e.inspect} raised #{fail_count} times saving:\n#{all_data}\n\nNow retrying in #{retry_interval} seconds" if verbose?
      sleep retry_interval
      retry_interval = retry_interval * 2
      retry
    else
      raise e
    end
  end

end

#primary_key_nameObject



60
61
62
# File 'lib/openc_bot/helpers/register_methods.rb', line 60

def primary_key_name
  self.const_defined?('PRIMARY_KEY_NAME') ? self.const_get('PRIMARY_KEY_NAME') : :uid
end

#registry_url(uid) ⇒ Object

sensible default. Either uses computed version or registry_url in db



65
66
67
# File 'lib/openc_bot/helpers/register_methods.rb', line 65

def registry_url(uid)
  computed_registry_url(uid) || registry_url_from_db(uid)
end

#registry_url_from_db(uid) ⇒ Object

stub method. Override in including module if this can be pulled from db (i.e. it is stored there)



74
75
# File 'lib/openc_bot/helpers/register_methods.rb', line 74

def registry_url_from_db(uid)
end

#save_entity(entity_datum) ⇒ Object



77
78
79
80
81
# File 'lib/openc_bot/helpers/register_methods.rb', line 77

def save_entity(entity_datum)
  validation_errors = validate_datum(entity_datum.except(:data))
  return unless validation_errors.blank?
  prepare_and_save_data(entity_datum)
end

#save_entity!(entity_datum) ⇒ Object

Behaves like save_entity but raises RecordInvalid exception if record is not valid (validation errors are available in the excpetion’s validation_errors method)



86
87
88
89
90
# File 'lib/openc_bot/helpers/register_methods.rb', line 86

def save_entity!(entity_datum)
  validation_errors = validate_datum(entity_datum.except(:data))
  raise OpencBot::RecordInvalid.new(validation_errors) unless validation_errors.blank?
  prepare_and_save_data(entity_datum)
end

#schema_nameObject



92
93
94
# File 'lib/openc_bot/helpers/register_methods.rb', line 92

def schema_name
  self.const_defined?('SCHEMA_NAME') ? self.const_get('SCHEMA_NAME') : nil
end

#stale_entry_uids(stale_count = nil) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/openc_bot/helpers/register_methods.rb', line 96

def stale_entry_uids(stale_count=nil)
  stale_count ||= 1000
  sql_query = "ocdata.* from ocdata WHERE retrieved_at IS NULL OR strftime('%s', retrieved_at) < strftime('%s',  '#{Date.today - 30}') LIMIT #{stale_count.to_i}"
  raw_data = select(sql_query).each do |res|
    yield res[primary_key_name.to_s]
  end
rescue SQLite3::SQLException => e
  if e.message[/no such column: retrieved_at/]
    sqlite_magic_connection.add_columns('ocdata', ['retrieved_at'])
    retry
  else
    raise e
  end
end

#update_data(options = {}) ⇒ Object



111
112
113
114
115
# File 'lib/openc_bot/helpers/register_methods.rb', line 111

def update_data(options={})
  fetch_data
  update_stale
  save_run_report(:status => 'success')
end

#update_datum(uid, output_as_json = false, replace_existing_data = false) ⇒ Object

If true is passed as the second argument, the method will output the updated result as json to STDOUT, which can then be consumed by, say, something which triggered this method, for example if it was called by a rake task, which in turn might have been called by the main OpenCorporates application

If the data to be saved is invalid then either the exception is raised, or, if output_as_json is requested then the validation error is included in the JSON error message



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/openc_bot/helpers/register_methods.rb', line 132

def update_datum(uid, output_as_json=false,replace_existing_data=false)
  return unless raw_data = fetch_datum(uid)
  default_options = {primary_key_name => uid, :retrieved_at => Time.now}
  return unless base_processed_data = process_datum(raw_data)
  processed_data = default_options.merge(base_processed_data)
  # prepare the data for saving (converting Arrays, Hashes to json) and
  # save the original data too, as we may not extracting everything from it yet
  save_entity(processed_data.merge(:data => raw_data))
  if output_as_json
    puts processed_data.to_json
  else
    processed_data
  end
rescue Exception => e
  if output_as_json
    output_json_error_message(e)
  else
    rich_message = "#{e.message} updating entry with uid: #{uid}"
    puts rich_message if verbose?
    raise $!, rich_message, $!.backtrace
  end
end

#update_stale(stale_count = nil) ⇒ Object



155
156
157
158
159
160
# File 'lib/openc_bot/helpers/register_methods.rb', line 155

def update_stale(stale_count=nil)
  stale_entry_uids(stale_count) do |stale_entry_uid|
    update_datum(stale_entry_uid)
  end

end

#use_alpha_searchObject



11
12
13
# File 'lib/openc_bot/helpers/register_methods.rb', line 11

def use_alpha_search
  self.const_defined?('USE_ALPHA_SEARCH') && self.const_get('USE_ALPHA_SEARCH')
end

#validate_datum(record) ⇒ Object



162
163
164
165
166
167
168
169
170
# File 'lib/openc_bot/helpers/register_methods.rb', line 162

def validate_datum(record)
  Dir.chdir('schemas/schemas') do
    JSON::Validator.fully_validate(
      "#{schema_name}.json",
      record.to_json,
      {:errors_as_objects => true}
    )
  end
end