Module: OpencBot::Helpers::AlphaSearch
Constant Summary
RegisterMethods::MAX_BUSY_RETRIES, RegisterMethods::MAX_STALE_COUNT
Instance Method Summary
collapse
#computed_registry_url, #datum_exists?, #default_stale_count, #export_data, #fetch_data, #fetch_registry_page, #get_raw_data, #post_process, #prepare_and_save_data, #primary_key_name, #raise_when_saving_invalid_record, #raw_data_file_location, #registry_url, #registry_url_from_db, #save_entity, #save_entity!, #save_raw_data, #schema_name, #stale_entry_uids, #update_data, #update_datum, #update_stale, #use_alpha_search, #validate_datum
Instance Method Details
#alpha_terms(starting_term = nil) ⇒ Object
10
11
12
13
14
15
16
17
|
# File 'lib/openc_bot/helpers/alpha_search.rb', line 10
def alpha_terms(starting_term=nil)
all_perms = letters_and_numbers.repeated_permutation(numbers_of_chars_in_search).
collect(&:join)
starting_position = starting_term && all_perms.index(starting_term)
all_perms[starting_position.to_i..-1]
end
|
#each_search_term(starting_term = nil) ⇒ Object
Iterates through each search term, yielding the result to a block, or returning the array of search_terms if no block given
33
34
35
|
# File 'lib/openc_bot/helpers/alpha_search.rb', line 33
def each_search_term(starting_term=nil)
alpha_terms(starting_term).each{ |t| yield t if block_given?}
end
|
#fetch_data_via_alpha_search(options = {}) ⇒ Object
19
20
21
22
23
24
25
26
27
28
29
|
# File 'lib/openc_bot/helpers/alpha_search.rb', line 19
def fetch_data_via_alpha_search(options={})
starting_term = options[:starting_term]||get_var('starting_term')
each_search_term(starting_term) do |term|
save_var('starting_term', term)
search_for_entities_for_term(term, options) do |entity_datum|
save_entity(entity_datum)
end
end
save_var('starting_term',nil)
end
|
#get_results_and_extract_data_for(prefix, search_offset) ⇒ Object
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
# File 'lib/openc_bot/helpers/alpha_search.rb', line 49
def get_results_and_extract_data_for(prefix, search_offset)
while search_offset do
url = "http://www.oera.li/WebServices/ZefixFL/ZefixFL.asmx/SearchFirm?name=#{prefix}%20&suche_nach=-&rf=&sitz=&id=&language=&phonetisch=no&posMin=#{search_offset}"
response =
begin
html = open(url).read.encode!('utf-8','iso-8859-1')
rescue Exception, Timeout::Error => e
puts "Problem getting/parsing data from #{url}: #{e.inspect}"
nil
end
next unless response
if response.match(/webservices\/HRG/) puts "****Scraping page #{(search_offset+10)/10}"
scrape_search_results_page(response, url)
save_var('search_offset', search_offset)
search_offset += 10
else
search_offset = false
end
end
end
|
#letters_and_numbers ⇒ Object
37
38
39
|
# File 'lib/openc_bot/helpers/alpha_search.rb', line 37
def letters_and_numbers
('A'..'Z').to_a + ('0'..'9').to_a
end
|
#numbers_of_chars_in_search ⇒ Object
41
42
43
|
# File 'lib/openc_bot/helpers/alpha_search.rb', line 41
def numbers_of_chars_in_search
self.const_defined?('NUMBER_OF_CHARS_IN_SEARCH') ? self.const_get('NUMBER_OF_CHARS_IN_SEARCH') : 1
end
|
#search_for_entities_for_term(term, options = {}) ⇒ Object
45
46
47
|
# File 'lib/openc_bot/helpers/alpha_search.rb', line 45
def search_for_entities_for_term(term, options={})
raise "The #search_for_entities_for_term method has not been implemented for this case.\nIt needs to be, and should yield a company data Hash"
end
|