Top Level Namespace

Defined Under Namespace

Classes: Array, HArray, Hash, Name, NamePartsParser

Instance Method Summary collapse

Instance Method Details

#aggressive_deep_symbolize_keys(maybe) ⇒ Object



22
23
24
25
26
27
# File 'lib/trials/utils/various.rb', line 22

def aggressive_deep_symbolize_keys(maybe)
  return maybe.deep_symbolize_keys if maybe.respond_to?(:deep_symbolize_keys)
  return maybe.map { |i| aggressive_deep_symbolize_keys(i) } if maybe.respond_to?(:each)

  maybe
end

#append(file, content) ⇒ Object Also known as: append_result, append_results



73
74
75
# File 'lib/trials/utils/files.rb', line 73

def append(file, content)
  File.open(results_path(file), 'a') { |f| f << content }
end

#array_to_count_hash(list) ⇒ Object



73
74
75
76
77
78
# File 'lib/trials/utils/hashes.rb', line 73

def array_to_count_hash(list)
  list.uniq.reduce({}) do |h, i|
    h[i] = list.count(i)
    h
  end
end

#cached_ddb_scan(query) ⇒ Object



9
10
11
# File 'lib/trials/utils/aws.rb', line 9

def cached_ddb_scan(query)
  json_cache(query.dig(:table_name)) { ddb_scan(query) }
end

#clean_xml(str) ⇒ Object



9
10
11
# File 'lib/trials/utils/xmls.rb', line 9

def clean_xml(str)
  Nokogiri::XML(str.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')).to_xml
end

#count_for_group_by(batch, &block) ⇒ Object



66
67
68
69
70
71
# File 'lib/trials/utils/hashes.rb', line 66

def count_for_group_by(batch, &block)
  batch
    .group_by(&block)
    .map { |k, v| [k, v.length] }
    .to_h
end

#create_db(name) ⇒ Object



22
23
24
25
26
27
28
29
30
# File 'lib/trials/utils/sqls.rb', line 22

def create_db(name)
  db_loc = results_path("#{name}.db")

  raise 'db already exists' if File.exist?(db_loc)

  db = SQLite3::Database.new(db_loc)
  db.results_as_hash = true
  db
end

#create_table(db, table, **attrs) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/trials/utils/sqls.rb', line 42

def create_table(db, table, **attrs)
  should_log = attrs.delete(:log)
  attrs = attrs.map { |k, v| "  #{k} #{v}" }.join(",\n")
  sql = <<~SQL
    create table #{table} (
    #{attrs}
    );
  SQL

  log sql if should_log
  db.execute sql
end

#ddb_connectionObject



1
2
3
4
5
6
7
# File 'lib/trials/utils/aws.rb', line 1

def ddb_connection
  @connection ||= Aws::DynamoDB::Client.new(
    access_key_id: secrets.aws.key,
    secret_access_key: secrets.aws.secret,
    region: secrets.aws.region,
  )
end

#ddb_scan(query) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/trials/utils/aws.rb', line 13

def ddb_scan(query)
  segmentation = query.delete(:segmentation) || 4

  threads = (0..segmentation - 1).map do |segment|
    Thread.new do
      Thread.current[:output] = ddb_scan_without_segmentation(
        query.merge(
          total_segments: segmentation,
          segment: segment,
        ),
      )
    end
  end

  threads.each(&:join)

  threads.map { |t| t[:output] }.flatten
end

#ddb_scan_without_segmentation(query) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/trials/utils/aws.rb', line 32

def ddb_scan_without_segmentation(query)
  result = nil
  requests = 0
  items = []

  loop do
    break unless result.blank? || result.last_evaluated_key.present?

    result = ddb_connection.scan(query.merge(exclusive_start_key: result&.last_evaluated_key))
    items += result.items.compact.map(&:symbolize_keys)
  end

  items
end

#ddb_upload_items(table, all_items) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/trials/utils/aws.rb', line 47

def ddb_upload_items(table, all_items)
  all_items.each_slice(25).with_index do |items, i|
    next unless items.any?

    begin
      ddb_connection.batch_write_item(
        request_items: {
          table => items.compact.map do |item|
            {
              put_request: {
                item: item
              }
            }
          end
        }
      )
    end

    yield(items, i * 25) if block_given?
  end
end

#delete(file) ⇒ Object Also known as: delete_result, delete_results

deleting



82
83
84
85
# File 'lib/trials/utils/files.rb', line 82

def delete(file)
  return unless result_exists?(file)
  FileUtils.rm_r(results_path(file))
end

#delete_seeds(file) ⇒ Object Also known as: delete_seed



95
96
97
98
# File 'lib/trials/utils/files.rb', line 95

def delete_seeds(file)
  return unless seed_exists?(file)
  FileUtils.rm_r(seeds_path(file))
end

#delete_tmp(file) ⇒ Object



90
91
92
93
# File 'lib/trials/utils/files.rb', line 90

def delete_tmp(file)
  return unless tmp_exists?(file)
  FileUtils.rm_r(tmp_path(file))
end

#float?(string) ⇒ Boolean

Returns:

  • (Boolean)


12
13
14
# File 'lib/trials/utils/various.rb', line 12

def float?(string)
  true if Float(string) rescue false
end

#float_or_nil(thing) ⇒ Object



16
17
18
19
20
# File 'lib/trials/utils/various.rb', line 16

def float_or_nil(thing)
  Float(thing)
rescue StandardError
  nil
end

#full_name_from_parts(name) ⇒ Object



78
79
80
81
82
83
84
# File 'lib/trials/data_handling/names.rb', line 78

def full_name_from_parts(name)
  first = normalize_name_part(name.first_name)
  middle = normalize_name_part(name.middle_name)
  last = normalize_name_part(name.last_name)

  [first, middle, last].join(' ')
end

#gd_sessionObject



1
2
3
4
5
6
7
8
# File 'lib/trials/utils/google_drive.rb', line 1

def gd_session
  @gd_session ||= begin
    write('config.json', secrets.google.drive_config_json)
    session = GoogleDrive::Session.from_config(results_path("config.json"))
    delete('config.json')
    session
  end
end

#get_rollbar_items(token:, status: 'active') ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/trials/utils/rollbar.rb', line 10

def get_rollbar_items(token:, status: 'active')
  current_page = 0
  items = []

  loop do
    single_page_of_items = get_single_rollbar_items_page(token: token, page: current_page, status: status)

    items += single_page_of_items

    break if single_page_of_items.empty?
    break if yield(current_page, items)

    current_page += 1
  end

  items.compact.uniq
end

#get_single_rollbar_items_page(token:, page:, status: 'active') ⇒ Object



1
2
3
4
5
6
7
8
# File 'lib/trials/utils/rollbar.rb', line 1

def get_single_rollbar_items_page(token:, page:, status: 'active')
  url = URI("https://api.rollbar.com/api/1/instances/?access_token=#{token}&page=#{page}&status=#{status}")
  http = Net::HTTP.new(url.host, url.port)
  http.use_ssl = true
  http.verify_mode = OpenSSL::SSL::VERIFY_NONE
  request = Net::HTTP::Get.new(url)
  JSON.parse(http.request(request).body).deep_symbolize_keys.dig(:result, :instances)
end

#hash_from_xml(str) ⇒ Object



1
2
3
# File 'lib/trials/utils/xmls.rb', line 1

def hash_from_xml(str)
  Hash.from_xml(clean_xml(str)).deep_symbolize_keys
end

#hashes_to_sql_temp_table(hashes) ⇒ Object



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/trials/utils/sqls.rb', line 1

def hashes_to_sql_temp_table(hashes)
  attrs = uniq_hash_keys(hashes)

  attr_chars = attrs
    .map { |a| "#{a} varchar" }
    .join(', ')

  value_tuples = hashes
    .map { |s| '(' + attrs.map { |a| "'#{s.dig(a) }'" }.join(', ') + ')' }
    .join(",\n")

  <<~SQL
    create temp table seed_data (
      #{attr_chars}
    );

    insert into seed_data values
    #{value_tuples};
  SQL
end

#insert_into_db(db, table, **attrs) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/trials/utils/sqls.rb', line 55

def insert_into_db(db, table, **attrs)
  should_log = attrs.delete(:log)
  keys_group = "(#{attrs.keys.join(', ')})"
  values_group = "(#{(['?'] * attrs.values.length).join(', ')})"
  sql = <<~SQL
    insert into #{table} #{keys_group}
    values #{values_group}
  SQL

  log sql if should_log
  db.execute sql, attrs.values
end

#invalidate_json_cacheObject



20
21
22
# File 'lib/trials/utils/jsons.rb', line 20

def invalidate_json_cache
  delete_tmp("json")
end

#json_cache(key) ⇒ Object



11
12
13
14
15
16
17
18
# File 'lib/trials/utils/jsons.rb', line 11

def json_cache(key)
  name = "json/#{key}.json"

  return aggressive_deep_symbolize_keys(JSON.parse(read_tmp(name))) if tmp_exists?(name)

  write_tmp(name, yield.to_json)
  json_cache(key)
end

#l(item, nl: true, quiet: false) ⇒ Object



40
41
42
# File 'lib/trials/utils/logging.rb', line 40

def l(item, nl: true, quiet: false)
  log(item, nl: nl, quiet: quiet, each: false)
end

#list_dir(dir) ⇒ Object



35
36
37
# File 'lib/trials/utils/files.rb', line 35

def list_dir(dir)
  Dir["#{seeds_path(dir)}/**/*"]
end

#log(item, nl: true, quiet: false, each: true) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/trials/utils/logging.rb', line 16

def log(item, nl: true, quiet: false, each: true)
  item ||= ''

  if each && item.is_a?(Array)
    item.each { |i| log(i, nl: nl, quiet: quiet, each: false) }
    return
  end

  File.open(results_path('log.txt'), 'a') do |f|
    f << begin
      if item.is_a?(String) || item.is_a?(Numeric)
        item.to_s
      else
        PP.pp(item, '').chomp
      end
    end

    f << "\n" if nl
  end

  print item unless quiet
  puts '' if nl
end

#make_seed(file) ⇒ Object

other



104
105
106
# File 'lib/trials/utils/files.rb', line 104

def make_seed(file)
  FileUtils.cp(results_path(file), seeds_path(file))
end

#make_tmp(file) ⇒ Object



108
109
110
# File 'lib/trials/utils/files.rb', line 108

def make_tmp(file)
  FileUtils.cp(results_path(file), tmp_path(file))
end

#merge_hash_groups(*groups, key:, join_type: :inner) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/trials/utils/hashes.rb', line 49

def merge_hash_groups(*groups, key:, join_type: :inner)
  groups = groups.map { |group| group.map { |g| [g.dig(key), g] }.to_h }

  keys = begin
    case join_type
    when :inner
      groups.map(&:keys).reduce(&:&)
    when :all
      groups.flat_map(&:keys).uniq
    when :first
      groups.first.keys
    end
  end

  keys.map { |key| groups.map { |g| g.dig(key) }.compact.reduce(&:merge) }
end

#name_from_parts(name) ⇒ Object

private



71
72
73
74
75
76
# File 'lib/trials/data_handling/names.rb', line 71

def name_from_parts(name)
  first = normalize_name_part(name.first_name)
  last = normalize_name_part(name.last_name)

  [first, last].join(' ')
end

#normalize_address(address) ⇒ Object

normal format <number> <STREET NAME>, <CITY>, <STATE> <postal>



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/trials/data_handling/addresses.rb', line 3

def normalize_address(address)
  return if address.blank?

  cleaner_string = if address.respond_to?(:address1)
    "#{address.address1}, #{address.city}, #{address.state} #{address.postal_code}"
  else
    address
  end

  cleaner_string = cleaner_string.gsub('#', ' #').squish
  parsed = StreetAddress::US.parse(cleaner_string)

  return if parsed.blank?
  return if parsed.number.blank?
  return if parsed.street.blank?
  return if parsed.city.blank?
  return if parsed.state.blank?
  return if parsed.postal_code.blank?

  parsed.prefix = nil
  parsed.suffix = nil
  parsed.unit_prefix = nil
  parsed.unit = nil
  parsed.postal_code_ext = nil

  parsed.to_s.upcase
end

#normalize_and_parse_address(address_string) ⇒ Object



35
36
37
# File 'lib/trials/data_handling/addresses.rb', line 35

def normalize_and_parse_address(address_string)
  StreetAddress::US.parse(normalize_address(address_string))
end

#normalize_and_parse_name(name) ⇒ Object



65
66
67
# File 'lib/trials/data_handling/names.rb', line 65

def normalize_and_parse_name(name)
  NamePartsParser.new(normalize_name(name))
end

#normalize_full_name(name) ⇒ Object



55
56
57
58
59
# File 'lib/trials/data_handling/names.rb', line 55

def normalize_full_name(name)
  return if name.blank?

  name.strip.upcase.delete('^A-Z\ \-').squeeze(" ")
end

#normalize_full_names(names) ⇒ Object



46
47
48
49
50
51
52
53
# File 'lib/trials/data_handling/names.rb', line 46

def normalize_full_names(names)
  Array.wrap(names)
    .map { |n| normalize_full_name(n) }
    .map(&:to_s)
    .map(&:presence)
    .compact
    .uniq
end

#normalize_name(name) ⇒ Object



38
39
40
41
42
43
44
# File 'lib/trials/data_handling/names.rb', line 38

def normalize_name(name)
  return if name.blank?

  name = name_from_parts(name) if name.respond_to?(:first_name)

  name.strip.upcase.delete('^A-Z\ \-').squeeze(" ")
end

#normalize_string(string) ⇒ Object



1
2
3
# File 'lib/trials/utils/strings.rb', line 1

def normalize_string(string)
  string.chomp.strip.squish
end

#or_nilObject



5
6
7
8
9
10
# File 'lib/trials/utils/various.rb', line 5

def or_nil
  val = yield
  raise if val.blank?
  val
rescue StandardError
end

#parse_address(address_string) ⇒ Object



31
32
33
# File 'lib/trials/data_handling/addresses.rb', line 31

def parse_address(address_string)
  StreetAddress::US.parse(address_string)
end

#parse_name(name) ⇒ Object



61
62
63
# File 'lib/trials/data_handling/names.rb', line 61

def parse_name(name)
  NamePartsParser.new(name)
end

#pdf_to_text(path) ⇒ Object



1
2
3
4
# File 'lib/trials/utils/pdfs.rb', line 1

def pdf_to_text(path)
  `pdftotext "#{seeds_path(path)}" #{tmp_path('tmp_pdf.txt')}; \
  cat #{tmp_path('tmp_pdf.txt')}`
end

#prettify_xml(str) ⇒ Object



5
6
7
# File 'lib/trials/utils/xmls.rb', line 5

def prettify_xml(str)
  Nokogiri::XML(str) { |c| c.default_xml.noblanks }.to_xml(indent: 2)
end

#query_db(db, query) ⇒ Object



68
69
70
# File 'lib/trials/utils/sqls.rb', line 68

def query_db(db, query)
  db.execute(query)
end

#read(file) ⇒ Object Also known as: read_seed, read_seeds

reading



41
42
43
44
# File 'lib/trials/utils/files.rb', line 41

def read(file)
  return unless seed_exists?(file)
  File.read(seeds_path(file))
end

#read_csv(filename) ⇒ Object



1
2
3
4
5
6
7
# File 'lib/trials/utils/csvs.rb', line 1

def read_csv(filename)
  CSV
    .foreach(seeds_path(filename), headers: true)
    .map(&:to_h)
    .map(&:symbolize_keys)
    .select { |i| i.values.any?(&:present?) }
end

#read_json(filename) ⇒ Object



1
2
3
4
5
# File 'lib/trials/utils/jsons.rb', line 1

def read_json(filename)
  result = JSON.parse(read(filename))

  aggressive_deep_symbolize_keys(result)
end

#read_tmp(file) ⇒ Object



49
50
51
52
# File 'lib/trials/utils/files.rb', line 49

def read_tmp(file)
  return unless tmp_exists?(file)
  File.read(tmp_path(file))
end

#readlines(file) ⇒ Object



54
55
56
# File 'lib/trials/utils/files.rb', line 54

def readlines(file)
  File.read(seeds_path(file)).split("\n")
end

#rename_hash_key(hash, from:, to:) ⇒ Object



36
37
38
39
# File 'lib/trials/utils/hashes.rb', line 36

def rename_hash_key(hash, from:, to:)
  hash[to] = hash.delete(from)
  hash
end

#rename_hash_keys(hash, scheme = {}) ⇒ Object



41
42
43
44
45
46
47
# File 'lib/trials/utils/hashes.rb', line 41

def rename_hash_keys(hash, scheme = {})
  scheme.each do |k, v|
    hash = rename_hash_key(hash, from: k, to: v)
  end

  hash
end

#render_table_from_hashes(hash_set, sort: true, headers: nil) ⇒ Object



1
2
3
4
5
6
7
8
9
10
11
12
13
14
# File 'lib/trials/utils/logging.rb', line 1

def render_table_from_hashes(hash_set, sort: true, headers: nil)
  return 'no data' if hash_set.blank?

  headers = headers || uniq_hash_keys(hash_set)

  headers.sort! if sort

  content = hash_set
    .map { |h| h.select { |k, v| v.present? }.to_h }
    .map { |hash| headers.map { |h| hash.dig(h) } }
    .map { |r| r.map(&:to_s) }

  TTY::Table.new(header: headers, rows: content).render(:unicode).to_s
end

#result_exists?(name) ⇒ Boolean Also known as: results_exist?

Returns:

  • (Boolean)


21
22
23
# File 'lib/trials/utils/files.rb', line 21

def result_exists?(name)
  File.exist?(result_path(name))
end

#results_path(name) ⇒ Object Also known as: result_path



15
16
17
# File 'lib/trials/utils/files.rb', line 15

def results_path(name)
  "#{ROOT}/results/#{RUN}/#{name}"
end

#sanitize_hash_value(hash, key:, type:, date_format: '%Y-%m-%d') ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/trials/utils/hashes.rb', line 5

def sanitize_hash_value(hash, key:, type:, date_format: '%Y-%m-%d')
  hash.merge(
    key => or_nil do
      case type
      when :date
        Date.strptime(hash.dig(key), date_format)
      when :datetime
        DateTime.parse(hash.dig(key))
      when :integer, :int
        hash.dig(key).to_i
      when :float
        hash.dig(key).to_f
      when :string
        hash.dig(key).to_s
      when :alphanum
        string_to_alphanum(hash.dig(key))
      when :present?
        hash.dig(key).present?
      end
    end
  )
end

#sanitize_hash_values(hash, scheme = {}) ⇒ Object



28
29
30
31
32
33
34
# File 'lib/trials/utils/hashes.rb', line 28

def sanitize_hash_values(hash, scheme = {})
  scheme.each do |k, v|
    hash = sanitize_hash_value(hash, key: k, type: v)
  end

  hash
end

#secretsObject



1
2
3
# File 'lib/trials/utils/various.rb', line 1

def secrets
  SECRETS
end

#seed_exists?(name) ⇒ Boolean Also known as: seeds_exist?

Returns:

  • (Boolean)


9
10
11
# File 'lib/trials/utils/files.rb', line 9

def seed_exists?(name)
  File.exist?(seed_path(name))
end

#seeds_path(name) ⇒ Object Also known as: seed_path

paths



3
4
5
# File 'lib/trials/utils/files.rb', line 3

def seeds_path(name)
  "#{ROOT}/seeds/#{name}"
end

#string_encode_utf_8(string) ⇒ Object



9
10
11
# File 'lib/trials/utils/strings.rb', line 9

def string_encode_utf_8(string)
  string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
end

#string_to_alphanum(string) ⇒ Object



5
6
7
# File 'lib/trials/utils/strings.rb', line 5

def string_to_alphanum(string)
  string.gsub(/[^A-Za-z0-9]/, '')
end

#timeitObject



1
2
3
# File 'lib/trials/utils/benchmarking.rb', line 1

def timeit
  log(Benchmark.measure { yield }.to_s)
end

#tmp_exists?(name) ⇒ Boolean

Returns:

  • (Boolean)


31
32
33
# File 'lib/trials/utils/files.rb', line 31

def tmp_exists?(name)
  File.exist?(tmp_path(name))
end

#tmp_path(name) ⇒ Object



27
28
29
# File 'lib/trials/utils/files.rb', line 27

def tmp_path(name)
  "#{ROOT}/tmp/#{name}"
end

#uniq_hash_keys(hashes) ⇒ Object



1
2
3
# File 'lib/trials/utils/hashes.rb', line 1

def uniq_hash_keys(hashes)
  hashes.flat_map(&:keys).uniq.compact
end

#update_counts_hash(counts, update) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
# File 'lib/trials/utils/hashes.rb', line 80

def update_counts_hash(counts, update)
  update.each do |k, v|
    if counts.key?(k)
      counts[k] += v
    else
      counts[k] = v
    end
  end

  counts
end

#use_db(name) ⇒ Object



32
33
34
35
36
37
38
39
40
# File 'lib/trials/utils/sqls.rb', line 32

def use_db(name)
  db_loc = seeds_path("#{name}.db")

  raise 'no db exists' unless File.exist?(db_loc)

  db = SQLite3::Database.new(db_loc)
  db.results_as_hash = true
  db
end

#write(file, content) ⇒ Object Also known as: write_result, write_results

writing



60
61
62
63
# File 'lib/trials/utils/files.rb', line 60

def write(file, content)
  FileUtils.mkdir_p(File.dirname(results_path(file)))
  File.open(results_path(file), 'w') { |f| f << content }
end

#write_csv_from_hashes(file, hash_set, attrs: nil) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
# File 'lib/trials/utils/csvs.rb', line 9

def write_csv_from_hashes(file, hash_set, attrs: nil)
  attrs ||= uniq_hash_keys(hash_set)

  CSV.open(results_path(file), 'w') do |csv|
    csv << attrs

    hash_set.each do |c|
      csv << attrs.map { |a| c.send(:dig, a) }
    end
  end
end

#write_hashes_to_json(file, hashes) ⇒ Object



7
8
9
# File 'lib/trials/utils/jsons.rb', line 7

def write_hashes_to_json(file, hashes)
  write(file, hashes.to_json)
end

#write_tmp(file, content) ⇒ Object



68
69
70
71
# File 'lib/trials/utils/files.rb', line 68

def write_tmp(file, content)
  FileUtils.mkdir_p(File.dirname(tmp_path(file)))
  File.open(tmp_path(file), 'w') { |f| f << content }
end