Class: NamesDataset
- Inherits:
-
Object
- Object
- NamesDataset
- Defined in:
- lib/names_dataset.rb,
lib/names_dataset/version.rb
Defined Under Namespace
Classes: Error
Constant Summary collapse
- FIRST_NAMES_ZIP_PATH =
File.("../../data/first_names.zip", __FILE__)
- LAST_NAMES_ZIP_PATH =
File.("../../data/last_names.zip", __FILE__)
- VERSION =
"1.0.0"
Instance Attribute Summary collapse
-
#first_names ⇒ Object
readonly
Returns the value of attribute first_names.
-
#last_names ⇒ Object
readonly
Returns the value of attribute last_names.
Instance Method Summary collapse
- #determine_gender(gender_data) ⇒ Object
- #empty_name_metadata ⇒ Object
- #empty_result ⇒ Object
- #get_country_codes(alpha_2: true) ⇒ Object
- #get_top_names(n: 10, gender: nil, country_alpha2: nil) ⇒ Object
-
#initialize(first_names_path: FIRST_NAMES_ZIP_PATH, last_names_path: LAST_NAMES_ZIP_PATH) ⇒ NamesDataset
constructor
A new instance of NamesDataset.
- #load_zipped_json(zip_path) ⇒ Object
- #map_country_codes(data) ⇒ Object
- #map_gender(data) ⇒ Object
- #matches_gender?(data, gender) ⇒ Boolean
- #normalize(str) ⇒ Object
- #post_process(data) ⇒ Object
- #search(name) ⇒ Object
Constructor Details
#initialize(first_names_path: FIRST_NAMES_ZIP_PATH, last_names_path: LAST_NAMES_ZIP_PATH) ⇒ NamesDataset
Returns a new instance of NamesDataset.
16 17 18 19 |
# File 'lib/names_dataset.rb', line 16 def initialize(first_names_path: FIRST_NAMES_ZIP_PATH, last_names_path: LAST_NAMES_ZIP_PATH) @first_names = load_zipped_json(first_names_path) @last_names = load_zipped_json(last_names_path) end |
Instance Attribute Details
#first_names ⇒ Object (readonly)
Returns the value of attribute first_names.
11 12 13 |
# File 'lib/names_dataset.rb', line 11 def first_names @first_names end |
#last_names ⇒ Object (readonly)
Returns the value of attribute last_names.
11 12 13 |
# File 'lib/names_dataset.rb', line 11 def last_names @last_names end |
Instance Method Details
#determine_gender(gender_data) ⇒ Object
116 117 118 119 120 121 122 123 124 |
# File 'lib/names_dataset.rb', line 116 def determine_gender(gender_data) return "N/A" if gender_data.empty? if gender_data.size == 1 gender_data.keys.first else (gender_data["M"] > gender_data["F"]) ? "M" : "F" end end |
#empty_name_metadata ⇒ Object
130 131 132 |
# File 'lib/names_dataset.rb', line 130 def {"country" => {}, "gender" => {}, "rank" => {}} end |
#empty_result ⇒ Object
126 127 128 |
# File 'lib/names_dataset.rb', line 126 def empty_result {first_name: , last_name: } end |
#get_country_codes(alpha_2: true) ⇒ Object
34 35 36 37 38 |
# File 'lib/names_dataset.rb', line 34 def get_country_codes(alpha_2: true) dataset = @first_names || @last_names country_codes = dataset.values.flat_map { |entry| entry["country"].keys }.uniq alpha_2 ? country_codes : country_codes.map { |code| IsoCountryCodes.find(code).name }.compact end |
#get_top_names(n: 10, gender: nil, country_alpha2: nil) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/names_dataset.rb', line 40 def get_top_names(n: 10, gender: nil, country_alpha2: nil) raise ArgumentError, "n must be positive" if n <= 0 dataset = @first_names raise Error, "No dataset loaded" if dataset.nil? ranks_per_country = Hash.new { |h, k| h[k] = Hash.new { |hh, kk| hh[kk] = [] } } dataset.each do |name, data| next unless matches_gender?(data, gender) data["rank"].each do |country, rank| next if country_alpha2 && country != country_alpha2 gender_label = determine_gender(data["gender"]) ranks_per_country[country][gender_label] << [name, rank] end end ranks_per_country.each do |country, genders| genders.each_key do |gender_label| genders[gender_label] = genders[gender_label].sort_by(&:last).take(n).map(&:first) end end ranks_per_country end |
#load_zipped_json(zip_path) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/names_dataset.rb', line 68 def load_zipped_json(zip_path) return {} unless File.exist?(zip_path) content = nil Zip::File.open(zip_path) do |zip_file| entry = zip_file.first content = entry.get_input_stream.read if entry end content ? JSON.parse(content) : {} rescue => e warn "Failed to load or parse #{zip_path}: #{e.}" {} end |
#map_country_codes(data) ⇒ Object
96 97 98 99 100 101 102 |
# File 'lib/names_dataset.rb', line 96 def map_country_codes(data) data.transform_keys do |alpha2| IsoCountryCodes.find(alpha2).name rescue IsoCountryCodes::UnknownCodeError nil end.compact end |
#map_gender(data) ⇒ Object
104 105 106 107 |
# File 'lib/names_dataset.rb', line 104 def map_gender(data) gender_map = {"M" => "Male", "F" => "Female"} data.transform_keys { |key| gender_map[key] } end |
#matches_gender?(data, gender) ⇒ Boolean
109 110 111 112 113 114 |
# File 'lib/names_dataset.rb', line 109 def matches_gender?(data, gender) return true unless gender gender_key = gender.downcase.start_with?("m") ? "M" : "F" data["gender"].key?(gender_key) end |
#normalize(str) ⇒ Object
82 83 84 |
# File 'lib/names_dataset.rb', line 82 def normalize(str) str.strip.capitalize end |
#post_process(data) ⇒ Object
86 87 88 89 90 91 92 93 94 |
# File 'lib/names_dataset.rb', line 86 def post_process(data) return nil unless data { "country" => map_country_codes(data["country"]), "gender" => map_gender(data["gender"]), "rank" => map_country_codes(data["rank"]) } end |
#search(name) ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/names_dataset.rb', line 21 def search(name) return empty_result if name.nil? || name.strip.empty? n = normalize(name) first_name_data = post_process(@first_names[n]) if @first_names.key?(n) last_name_data = post_process(@last_names[n]) if @last_names.key?(n) { first_name: first_name_data || , last_name: last_name_data || } end |