Class: FeatureSet::Datum

Inherits:
Object
  • Object
show all
Defined in:
lib/feature_set/datum.rb

Constant Summary collapse

TOKEN_REGEX =
/[\s\/]+/
NON_ASCII_REGEX =
/[^a-zA-Z0-9_-]/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(v) ⇒ Datum

Returns a new instance of Datum.



8
9
10
# File 'lib/feature_set/datum.rb', line 8

def initialize(v)
  self.value = v
end

Instance Attribute Details

#valueObject

Returns the value of attribute value.



6
7
8
# File 'lib/feature_set/datum.rb', line 6

def value
  @value
end

Instance Method Details

#token_countsObject



18
19
20
21
22
# File 'lib/feature_set/datum.rb', line 18

def token_counts
  @token_counts ||= begin
    tokens.inject({}) { |m, w| m[w] ||= 0; m[w] += 1; m }
  end
end

#tokensObject



12
13
14
15
16
# File 'lib/feature_set/datum.rb', line 12

def tokens
  @tokens ||= begin
    value.strip.downcase.gsub(NON_ASCII_REGEX, ' ').split(TOKEN_REGEX)
  end
end