Class: Babel::Profile

Inherits:
Object
  • Object
show all
Defined in:
lib/babel/profile.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(language = nil) ⇒ Profile

Returns a new instance of Profile.



5
6
7
8
9
# File 'lib/babel/profile.rb', line 5

def initialize(language = nil)
  @data = {}
  @total_occurences = 0
  @language = language
end

Instance Attribute Details

#dataObject (readonly)

Returns the value of attribute data.



4
5
6
# File 'lib/babel/profile.rb', line 4

def data
  @data
end

#languageObject (readonly)

Returns the value of attribute language.



3
4
5
# File 'lib/babel/profile.rb', line 3

def language
  @language
end

Instance Method Details

#clean(text) ⇒ Object

TODO: needed?



41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/babel/profile.rb', line 41

def clean(text)
  return text
  text = text.gsub(/[0-9]/, '')
  text = text.gsub(':', '')
  text = text.gsub('/', '')
  text = text.gsub('_', '')
  text = text.gsub('(', '')
  text = text.gsub(')', '')
  text = text.gsub(';', '')
  text = text.gsub('?', '')
  
  return text
end

#distance(other) ⇒ Object

Calculate the distance to another profile



96
97
98
99
100
101
102
103
104
105
# File 'lib/babel/profile.rb', line 96

def distance(other)
  @data.inject(0) do |memo, item|
    other_ranking = other.ranking(item.first)
    if other_ranking == 0
      memo += 1
    else
      memo += (other_ranking - item.last.last).abs
    end
  end
end

#learn(text, options = {}) ⇒ Object

learn a text following options are used when generating the n-grams:

* min_length => 2
* max_length => 5
* pad => true


17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/babel/profile.rb', line 17

def learn(text, options = {})
  options = {:min_length => 2, :max_length => 5, :pad => true}.merge(options)
  text = clean(text)
  text.split(' ').each do |word|
    word.n_grams(options).each do |ngram|
      self.occured(ngram)
    end
  end
  # after learning rank the new n-grams
  self.rank
  self # return self so we can chain learn commans. profile.learn('asasas').learn('asdsad')
end

#limit(boundary = 100) ⇒ Object

limit this profile to n items profile needs to be ranked first



57
58
59
60
61
62
# File 'lib/babel/profile.rb', line 57

def limit(boundary = 100)
  @data.reject! do |key, value|
    raise 'Please call rank() first' if value.last == 0
    boundary < value.last
  end
end

#merge(other) ⇒ Object



31
32
33
34
35
36
37
38
# File 'lib/babel/profile.rb', line 31

def merge(other)
  if self.language != other.language
    raise ArgumentError.new("self has a language of #{self.language} but profile to merge has #{other.language}")
  end
  other.data.each do |key, value|
    self.occured(key, value.first)
  end
end

#occured(ngram, amount = 1) ⇒ Object

Called when a n-gram is occured, optional you can pass an amount (how many times the ngram occured)



80
81
82
83
# File 'lib/babel/profile.rb', line 80

def occured(ngram, amount = 1)
  (@data[ngram] ||= [0, 0])[0] += amount
  @total_occurences += amount
end

#occurence(ngram) ⇒ Object

find the occurence of a ngram. if it never occured, returns 0



86
87
88
# File 'lib/babel/profile.rb', line 86

def occurence(ngram)
  @data[ngram] ? @data[ngram].first : 0
end

#rankObject

rank the current profile ngrams are sorted by occurence and then ranked



66
67
68
69
70
71
72
73
74
75
76
# File 'lib/babel/profile.rb', line 66

def rank
  #@data.values.sort do |o1, o2|
  #  o2.first <=> o1.first
  #end.each_with_index do |item, index|
  #  item[1] = index + 1
  #end
  
  @data.values.each do |value|
    value[1] = value[0] / @total_occurences.to_f
  end
end

#ranking(ngram) ⇒ Object

find the ranking of a ngram. if it is not yet ranked, return 0



91
92
93
# File 'lib/babel/profile.rb', line 91

def ranking(ngram)
  @data[ngram] ? @data[ngram].last : 0
end

#to_sObject



108
109
110
# File 'lib/babel/profile.rb', line 108

def to_s
  @data.inspect
end