Module: OpencBot::Helpers::AlphaSearch

Includes:
RegisterMethods
Included in:
CompanyFetcherBot
Defined in:
lib/openc_bot/helpers/alpha_search.rb

Constant Summary

Constants included from RegisterMethods

RegisterMethods::MAX_BUSY_RETRIES, RegisterMethods::MAX_STALE_COUNT

Instance Method Summary collapse

Methods included from RegisterMethods

#computed_registry_url, #datum_exists?, #default_stale_count, #export_data, #fetch_data, #fetch_registry_page, #get_raw_data, #post_process, #prepare_and_save_data, #primary_key_name, #raise_when_saving_invalid_record, #raw_data_file_location, #registry_url, #registry_url_from_db, #save_entity, #save_entity!, #save_raw_data, #schema_name, #stale_entry_uids, #update_data, #update_datum, #update_stale, #use_alpha_search, #validate_datum

Instance Method Details

#alpha_terms(starting_term = nil) ⇒ Object



10
11
12
13
14
15
16
17
# File 'lib/openc_bot/helpers/alpha_search.rb', line 10

def alpha_terms(starting_term=nil)
  all_perms = letters_and_numbers.repeated_permutation(numbers_of_chars_in_search).
    collect(&:join)
  # get starting position from given term
  starting_position = starting_term && all_perms.index(starting_term)
  # start from starting_position if we have it or from start of array (pos 0) if not
  all_perms[starting_position.to_i..-1]
end

#each_search_term(starting_term = nil) ⇒ Object

Iterates through each search term, yielding the result to a block, or returning the array of search_terms if no block given



33
34
35
# File 'lib/openc_bot/helpers/alpha_search.rb', line 33

def each_search_term(starting_term=nil)
  alpha_terms(starting_term).each{ |t| yield t if block_given?}
end

#fetch_data_via_alpha_search(options = {}) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
# File 'lib/openc_bot/helpers/alpha_search.rb', line 19

def fetch_data_via_alpha_search(options={})
  starting_term = options[:starting_term]||get_var('starting_term')
  each_search_term(starting_term) do |term|
    save_var('starting_term', term)
    search_for_entities_for_term(term, options) do |entity_datum|
      save_entity(entity_datum)
    end
  end
  # reset pointer
  save_var('starting_term',nil)
end

#get_results_and_extract_data_for(prefix, search_offset) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/openc_bot/helpers/alpha_search.rb', line 49

def get_results_and_extract_data_for(prefix, search_offset)
  while search_offset do
    url = "http://www.oera.li/WebServices/ZefixFL/ZefixFL.asmx/SearchFirm?name=#{prefix}%20&suche_nach=-&rf=&sitz=&id=&language=&phonetisch=no&posMin=#{search_offset}"
    response =
      begin
        html = open(url).read.encode!('utf-8','iso-8859-1')
      rescue Exception, Timeout::Error => e
        puts "Problem getting/parsing data from #{url}: #{e.inspect}"
        nil
      end
    next unless response
    if response.match(/webservices\/HRG/) # check has links to companies
      puts "****Scraping page #{(search_offset+10)/10}"
      scrape_search_results_page(response, url)
      save_var('search_offset', search_offset)
      search_offset += 10
    else
      search_offset = false
    end
  end
end

#letters_and_numbersObject



37
38
39
# File 'lib/openc_bot/helpers/alpha_search.rb', line 37

def letters_and_numbers
  ('A'..'Z').to_a + ('0'..'9').to_a
end

#numbers_of_chars_in_searchObject



41
42
43
# File 'lib/openc_bot/helpers/alpha_search.rb', line 41

def numbers_of_chars_in_search
  self.const_defined?('NUMBER_OF_CHARS_IN_SEARCH') ? self.const_get('NUMBER_OF_CHARS_IN_SEARCH') : 1
end

#search_for_entities_for_term(term, options = {}) ⇒ Object



45
46
47
# File 'lib/openc_bot/helpers/alpha_search.rb', line 45

def search_for_entities_for_term(term, options={})
  raise "The #search_for_entities_for_term method has not been implemented for this case.\nIt needs to be, and should yield a company data Hash"
end