Class: NamesDataset

Inherits:
Object
  • Object
show all
Defined in:
lib/names_dataset.rb,
lib/names_dataset/version.rb

Defined Under Namespace

Classes: Error

Constant Summary collapse

FIRST_NAMES_ZIP_PATH =
File.expand_path("../../data/first_names.zip", __FILE__)
LAST_NAMES_ZIP_PATH =
File.expand_path("../../data/last_names.zip", __FILE__)
VERSION =
"1.0.0"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(first_names_path: FIRST_NAMES_ZIP_PATH, last_names_path: LAST_NAMES_ZIP_PATH) ⇒ NamesDataset

Returns a new instance of NamesDataset.



16
17
18
19
# File 'lib/names_dataset.rb', line 16

def initialize(first_names_path: FIRST_NAMES_ZIP_PATH, last_names_path: LAST_NAMES_ZIP_PATH)
  @first_names = load_zipped_json(first_names_path)
  @last_names = load_zipped_json(last_names_path)
end

Instance Attribute Details

#first_namesObject (readonly)

Returns the value of attribute first_names.



11
12
13
# File 'lib/names_dataset.rb', line 11

def first_names
  @first_names
end

#last_namesObject (readonly)

Returns the value of attribute last_names.



11
12
13
# File 'lib/names_dataset.rb', line 11

def last_names
  @last_names
end

Instance Method Details

#determine_gender(gender_data) ⇒ Object



116
117
118
119
120
121
122
123
124
# File 'lib/names_dataset.rb', line 116

def determine_gender(gender_data)
  return "N/A" if gender_data.empty?

  if gender_data.size == 1
    gender_data.keys.first
  else
    (gender_data["M"] > gender_data["F"]) ? "M" : "F"
  end
end

#empty_name_metadataObject



130
131
132
# File 'lib/names_dataset.rb', line 130

def 
  {"country" => {}, "gender" => {}, "rank" => {}}
end

#empty_resultObject



126
127
128
# File 'lib/names_dataset.rb', line 126

def empty_result
  {first_name: , last_name: }
end

#get_country_codes(alpha_2: true) ⇒ Object



34
35
36
37
38
# File 'lib/names_dataset.rb', line 34

def get_country_codes(alpha_2: true)
  dataset = @first_names || @last_names
  country_codes = dataset.values.flat_map { |entry| entry["country"].keys }.uniq
  alpha_2 ? country_codes : country_codes.map { |code| IsoCountryCodes.find(code).name }.compact
end

#get_top_names(n: 10, gender: nil, country_alpha2: nil) ⇒ Object

Raises:

  • (ArgumentError)


40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/names_dataset.rb', line 40

def get_top_names(n: 10, gender: nil, country_alpha2: nil)
  raise ArgumentError, "n must be positive" if n <= 0

  dataset = @first_names
  raise Error, "No dataset loaded" if dataset.nil?

  ranks_per_country = Hash.new { |h, k| h[k] = Hash.new { |hh, kk| hh[kk] = [] } }

  dataset.each do |name, data|
    next unless matches_gender?(data, gender)

    data["rank"].each do |country, rank|
      next if country_alpha2 && country != country_alpha2

      gender_label = determine_gender(data["gender"])
      ranks_per_country[country][gender_label] << [name, rank]
    end
  end

  ranks_per_country.each do |country, genders|
    genders.each_key do |gender_label|
      genders[gender_label] = genders[gender_label].sort_by(&:last).take(n).map(&:first)
    end
  end

  ranks_per_country
end

#load_zipped_json(zip_path) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/names_dataset.rb', line 68

def load_zipped_json(zip_path)
  return {} unless File.exist?(zip_path)

  content = nil
  Zip::File.open(zip_path) do |zip_file|
    entry = zip_file.first
    content = entry.get_input_stream.read if entry
  end
  content ? JSON.parse(content) : {}
rescue => e
  warn "Failed to load or parse #{zip_path}: #{e.message}"
  {}
end

#map_country_codes(data) ⇒ Object



96
97
98
99
100
101
102
# File 'lib/names_dataset.rb', line 96

def map_country_codes(data)
  data.transform_keys do |alpha2|
    IsoCountryCodes.find(alpha2).name
  rescue IsoCountryCodes::UnknownCodeError
    nil
  end.compact
end

#map_gender(data) ⇒ Object



104
105
106
107
# File 'lib/names_dataset.rb', line 104

def map_gender(data)
  gender_map = {"M" => "Male", "F" => "Female"}
  data.transform_keys { |key| gender_map[key] }
end

#matches_gender?(data, gender) ⇒ Boolean

Returns:

  • (Boolean)


109
110
111
112
113
114
# File 'lib/names_dataset.rb', line 109

def matches_gender?(data, gender)
  return true unless gender

  gender_key = gender.downcase.start_with?("m") ? "M" : "F"
  data["gender"].key?(gender_key)
end

#normalize(str) ⇒ Object



82
83
84
# File 'lib/names_dataset.rb', line 82

def normalize(str)
  str.strip.capitalize
end

#post_process(data) ⇒ Object



86
87
88
89
90
91
92
93
94
# File 'lib/names_dataset.rb', line 86

def post_process(data)
  return nil unless data

  {
    "country" => map_country_codes(data["country"]),
    "gender" => map_gender(data["gender"]),
    "rank" => map_country_codes(data["rank"])
  }
end

#search(name) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/names_dataset.rb', line 21

def search(name)
  return empty_result if name.nil? || name.strip.empty?

  n = normalize(name)
  first_name_data = post_process(@first_names[n]) if @first_names.key?(n)
  last_name_data = post_process(@last_names[n]) if @last_names.key?(n)

  {
    first_name: first_name_data || ,
    last_name: last_name_data || 
  }
end