Class: FeatureGenerator
- Inherits:
-
Object
- Object
- FeatureGenerator
- Defined in:
- lib/libsvm_preprocessor/feature_generator.rb
Instance Method Summary collapse
- #bigrams(ary) ⇒ Object
- #features(ary_of_terms, testing: false) ⇒ Object
- #hash_of_ngrams ⇒ Object
-
#initialize(options = {}) ⇒ FeatureGenerator
constructor
A new instance of FeatureGenerator.
- #trichar(ary_of_terms) ⇒ Object
- #unigrams(ary_of_term) ⇒ Object
Constructor Details
#initialize(options = {}) ⇒ FeatureGenerator
Returns a new instance of FeatureGenerator.
7 8 9 10 11 |
# File 'lib/libsvm_preprocessor/feature_generator.rb', line 7 def initialize( = {}) @token_map = TokenMap.new @options = @options[:mode] ||= :unigram end |
Instance Method Details
#bigrams(ary) ⇒ Object
42 43 44 |
# File 'lib/libsvm_preprocessor/feature_generator.rb', line 42 def bigrams(ary) ary[0...-1].zip(ary[1..-1]) end |
#features(ary_of_terms, testing: false) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/libsvm_preprocessor/feature_generator.rb', line 13 def features(ary_of_terms, testing: false) if @options[:mode] == :unigram @token_map.token_map(unigrams(ary_of_terms), testing: testing) elsif @options[:mode] == :bigram @token_map.token_map(unigrams(ary_of_terms) + bigrams(ary_of_terms), testing: testing) elsif @options[:mode] == :trichar @token_map.token_map trichar(ary_of_terms) end end |
#hash_of_ngrams ⇒ Object
3 4 5 |
# File 'lib/libsvm_preprocessor/feature_generator.rb', line 3 def hash_of_ngrams @token_map.hash_of_ngrams end |
#trichar(ary_of_terms) ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/libsvm_preprocessor/feature_generator.rb', line 25 def trichar(ary_of_terms) string = ary_of_terms.join(" ") if string.size < 3 return [ [string] ] end string1 = string[0...-2].split(//) string2 = string[1...-1].split(//) string3 = string[2..-1].split(//) string1.zip(string2).zip(string3).map do |x| [x.flatten.join] end end |
#unigrams(ary_of_term) ⇒ Object
38 39 40 |
# File 'lib/libsvm_preprocessor/feature_generator.rb', line 38 def unigrams(ary_of_term) ary_of_term.map { |term| [term] } end |