Class: Babel::Profile
- Inherits:
-
Object
- Object
- Babel::Profile
- Defined in:
- lib/babel/profile.rb
Instance Attribute Summary collapse
-
#data ⇒ Object
readonly
Returns the value of attribute data.
-
#language ⇒ Object
readonly
Returns the value of attribute language.
Instance Method Summary collapse
-
#clean(text) ⇒ Object
TODO: needed?.
-
#distance(other) ⇒ Object
Calculate the distance to another profile.
-
#initialize(language = nil) ⇒ Profile
constructor
A new instance of Profile.
-
#learn(text, options = {}) ⇒ Object
learn a text following options are used when generating the n-grams: * min_length => 2 * max_length => 5 * pad => true.
-
#limit(boundary = 100) ⇒ Object
limit this profile to n items profile needs to be ranked first.
- #merge(other) ⇒ Object
-
#occured(ngram, amount = 1) ⇒ Object
Called when a n-gram is occured, optional you can pass an amount (how many times the ngram occured).
-
#occurence(ngram) ⇒ Object
find the occurence of a ngram.
-
#rank ⇒ Object
rank the current profile ngrams are sorted by occurence and then ranked.
-
#ranking(ngram) ⇒ Object
find the ranking of a ngram.
- #to_s ⇒ Object
Constructor Details
#initialize(language = nil) ⇒ Profile
Returns a new instance of Profile.
5 6 7 8 9 |
# File 'lib/babel/profile.rb', line 5 def initialize(language = nil) @data = {} @total_occurences = 0 @language = language end |
Instance Attribute Details
#data ⇒ Object (readonly)
Returns the value of attribute data.
4 5 6 |
# File 'lib/babel/profile.rb', line 4 def data @data end |
#language ⇒ Object (readonly)
Returns the value of attribute language.
3 4 5 |
# File 'lib/babel/profile.rb', line 3 def language @language end |
Instance Method Details
#clean(text) ⇒ Object
TODO: needed?
41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/babel/profile.rb', line 41 def clean(text) return text text = text.gsub(/[0-9]/, '') text = text.gsub(':', '') text = text.gsub('/', '') text = text.gsub('_', '') text = text.gsub('(', '') text = text.gsub(')', '') text = text.gsub(';', '') text = text.gsub('?', '') return text end |
#distance(other) ⇒ Object
Calculate the distance to another profile
96 97 98 99 100 101 102 103 104 105 |
# File 'lib/babel/profile.rb', line 96 def distance(other) @data.inject(0) do |memo, item| other_ranking = other.ranking(item.first) if other_ranking == 0 memo += 1 else memo += (other_ranking - item.last.last).abs end end end |
#learn(text, options = {}) ⇒ Object
learn a text following options are used when generating the n-grams:
* min_length => 2
* max_length => 5
* pad => true
17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/babel/profile.rb', line 17 def learn(text, = {}) = {:min_length => 2, :max_length => 5, :pad => true}.merge() text = clean(text) text.split(' ').each do |word| word.n_grams().each do |ngram| self.occured(ngram) end end # after learning rank the new n-grams self.rank self # return self so we can chain learn commans. profile.learn('asasas').learn('asdsad') end |
#limit(boundary = 100) ⇒ Object
limit this profile to n items profile needs to be ranked first
57 58 59 60 61 62 |
# File 'lib/babel/profile.rb', line 57 def limit(boundary = 100) @data.reject! do |key, value| raise 'Please call rank() first' if value.last == 0 boundary < value.last end end |
#merge(other) ⇒ Object
31 32 33 34 35 36 37 38 |
# File 'lib/babel/profile.rb', line 31 def merge(other) if self.language != other.language raise ArgumentError.new("self has a language of #{self.language} but profile to merge has #{other.language}") end other.data.each do |key, value| self.occured(key, value.first) end end |
#occured(ngram, amount = 1) ⇒ Object
Called when a n-gram is occured, optional you can pass an amount (how many times the ngram occured)
80 81 82 83 |
# File 'lib/babel/profile.rb', line 80 def occured(ngram, amount = 1) (@data[ngram] ||= [0, 0])[0] += amount @total_occurences += amount end |
#occurence(ngram) ⇒ Object
find the occurence of a ngram. if it never occured, returns 0
86 87 88 |
# File 'lib/babel/profile.rb', line 86 def occurence(ngram) @data[ngram] ? @data[ngram].first : 0 end |
#rank ⇒ Object
rank the current profile ngrams are sorted by occurence and then ranked
66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/babel/profile.rb', line 66 def rank #@data.values.sort do |o1, o2| # o2.first <=> o1.first #end.each_with_index do |item, index| # item[1] = index + 1 #end @data.values.each do |value| value[1] = value[0] / @total_occurences.to_f end end |
#ranking(ngram) ⇒ Object
find the ranking of a ngram. if it is not yet ranked, return 0
91 92 93 |
# File 'lib/babel/profile.rb', line 91 def ranking(ngram) @data[ngram] ? @data[ngram].last : 0 end |
#to_s ⇒ Object
108 109 110 |
# File 'lib/babel/profile.rb', line 108 def to_s @data.inspect end |