Class: SexMachine::Detector

Inherits:
Object
  • Object
show all
Defined in:
lib/sexmachine/detector.rb

Constant Summary collapse

COUNTRIES =
[ :great_britain, :ireland, :usa, :italy, :malta, :portugal, :spain, :france, :belgium, :luxembourg, :the_netherlands, :east_frisia,
:germany, :austria, :swiss, :iceland, :denmark, :norway, :sweden, :finland, :estonia, :latvia, :lithuania, :poland, :czech_republic,
:slovakia, :hungary, :romania, :bulgaria, :bosniaand, :croatia, :kosovo, :macedonia, :montenegro, :serbia, :slovenia, :albania,
:greece, :russia, :belarus, :moldova, :ukraine, :armenia, :azerbaijan, :georgia, :the_stans, :turkey, :arabia, :israel, :china,
:india, :japan, :korea, :vietnam, :other_countries ]

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Detector

Returns a new instance of Detector.



12
13
14
15
16
17
18
19
20
21
# File 'lib/sexmachine/detector.rb', line 12

def initialize(opts = {})
  opts = {
    :filename => File.expand_path('../data/nam_dict.txt', __FILE__),
    :case_sensitive => true,
    :unknown_value => :andy
  }.merge(opts)
  @case_sensitive = opts[:case_sensitive]
  @unknown_value = opts[:unknown_value]
  parse opts[:filename]
end

Instance Method Details

#get_gender(name, country = nil) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/sexmachine/detector.rb', line 32

def get_gender(name, country = nil)
  name = UnicodeUtils.downcase(name) unless @case_sensitive

  if not @names.has_key?(name)
    @unknown_value
  elsif country.nil?
    most_popular_gender(name) { |country_values|
      country_values.split("").select { |l| l.strip != "" }.length
    }
  elsif COUNTRIES.include?(country)
    index = COUNTRIES.index(country)
    most_popular_gender(name) { |country_values|
      country_values[index].ord
    }
  else
    raise "No such country: #{country}"
  end
end

#parse(fname) ⇒ Object



23
24
25
26
27
28
29
30
# File 'lib/sexmachine/detector.rb', line 23

def parse(fname)
  @names = {}
  open(fname, "r:iso8859-1:utf-8") { |f|
    f.each_line { |line|
      eat_name_line line
    }
  }
end