Class: NGramsGenerator

Inherits:
Object
  • Object
show all
Defined in:
lib/n_grams_generator.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ns, options = {}) ⇒ NGramsGenerator

Creates NGramsGenerator object.

Parameters:

  • ns (Number, Array<Number>)

    the lengths of generated n-grams

  • opts (Hash)

    the additional options

  • [true, (Hash)

    a customizable set of options



13
14
15
16
17
18
19
20
# File 'lib/n_grams_generator.rb', line 13

def initialize(ns, options={})
  @ns = ns.kind_of?(Enumerable) ? ns : [ns]

  @auto_clear = options[:auto_clear] || false
  @no_count = options[:no_count] || false

  @all_ngrams = Hash.new([])
end

Instance Attribute Details

#all_ngramsObject (readonly)

Returns the value of attribute all_ngrams.



5
6
7
# File 'lib/n_grams_generator.rb', line 5

def all_ngrams
  @all_ngrams
end

#auto_clearObject (readonly)

Returns the value of attribute auto_clear.



5
6
7
# File 'lib/n_grams_generator.rb', line 5

def auto_clear
  @auto_clear
end

#no_countObject (readonly)

Returns the value of attribute no_count.



5
6
7
# File 'lib/n_grams_generator.rb', line 5

def no_count
  @no_count
end

#nsObject (readonly)

Returns the value of attribute ns.



5
6
7
# File 'lib/n_grams_generator.rb', line 5

def ns
  @ns
end

Instance Method Details

#clear_ngramsObject



69
70
71
# File 'lib/n_grams_generator.rb', line 69

def clear_ngrams
  @all_ngrams.clear
end

#count_ngrams(ngrams = @all_ngrams) ⇒ Object



65
66
67
# File 'lib/n_grams_generator.rb', line 65

def count_ngrams(ngrams=@all_ngrams)
  @ns.inject({}) { |hsh, n| hsh[n] = count_array_elements(ngrams[n]); hsh }
end

#ngrams(data) ⇒ Hash

Generates n-grams according to lengths specified during construction.

NGramsGenerator.new(3).ngrams(['Alice', 'has', 'a', 'cat']) 
  # => {
        3 => { ['Alice', 'has', 'a'] => 1, ['has', 'a', 'cat'] => 1 }
      }
NGramsGenerator.new([2, 3]).ngrams(['Alice', 'has', 'a', 'cat'], :no_count => true) 
  # => { 
     2 => [ ['Alice', 'has'], ['has', 'a'], ['a', 'cat'] ]
     3 => [ ['Alice', 'has', 'a'], ['has', 'a', 'cat'] ]
   }
NGramsGenerator.new(2).grams([['very', :adv], ['nice', :adj], ['job', :noun]])
  # => { 
     2 => { 
       ["very", "nice"] => 1, 
       [:adv, "nice"] => 1, 
       ["very", :adj] => 1, 
       [:adv, :adj] => 1, 
       ["nice", "job"] => 1, 
       [:adj, "job"] => 1, 
       ["nice", :noun] => 1, 
       [:adj, :noun] => 1
     }
   }

Parameters:

  • data (Array<Object>, Array<Array<Object>>)

Returns:

  • (Hash)

    the generated ngrams by n-number



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/n_grams_generator.rb', line 49

def ngrams(data)
  return @all_ngrams if data.nil? or data.empty?

  clear_ngrams if @auto_clear

  temp_ngrams = flat_ngrams(data.map{ |item| item.kind_of?(Enumerable) ? item : [item] })
  new_ngrams = Hash.new([])

  temp_ngrams.keys.each do |n|
    temp_ngrams[n].each { |ngram| new_ngrams[n] += multiply_ngram(ngram) }
  end

  @all_ngrams = add_new_ngrams(new_ngrams)
  return @all_ngrams
end