Class: FeatureSet::Datum
- Inherits:
-
Object
- Object
- FeatureSet::Datum
- Defined in:
- lib/feature_set/datum.rb
Constant Summary collapse
- TOKEN_REGEX =
/[\s\/]+/
- NON_ASCII_REGEX =
/[^a-zA-Z0-9_-]/
Instance Attribute Summary collapse
-
#value ⇒ Object
Returns the value of attribute value.
Instance Method Summary collapse
-
#initialize(v) ⇒ Datum
constructor
A new instance of Datum.
- #token_counts ⇒ Object
- #tokens ⇒ Object
Constructor Details
#initialize(v) ⇒ Datum
Returns a new instance of Datum.
8 9 10 |
# File 'lib/feature_set/datum.rb', line 8 def initialize(v) self.value = v end |
Instance Attribute Details
#value ⇒ Object
Returns the value of attribute value.
6 7 8 |
# File 'lib/feature_set/datum.rb', line 6 def value @value end |
Instance Method Details
#token_counts ⇒ Object
18 19 20 21 22 |
# File 'lib/feature_set/datum.rb', line 18 def token_counts @token_counts ||= begin tokens.inject({}) { |m, w| m[w] ||= 0; m[w] += 1; m } end end |
#tokens ⇒ Object
12 13 14 15 16 |
# File 'lib/feature_set/datum.rb', line 12 def tokens @tokens ||= begin value.strip.downcase.gsub(NON_ASCII_REGEX, ' ').split(TOKEN_REGEX) end end |