Class: VectorEmbed
- Inherits:
-
Object
- Object
- VectorEmbed
- Defined in:
- lib/vector_embed.rb,
lib/vector_embed/maker.rb,
lib/vector_embed/version.rb,
lib/vector_embed/stop_word.rb,
lib/vector_embed/maker/date.rb,
lib/vector_embed/maker/ngram.rb,
lib/vector_embed/maker/number.rb,
lib/vector_embed/maker/phrase.rb,
lib/vector_embed/maker/boolean.rb
Defined Under Namespace
Constant Summary collapse
- JUST_A_NUMBER =
/\A\s*[+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?\s*\z/- BLANK =
/\A\s*\z/- NULL =
/\Anull\z/i- SLASH_N =
'\N'- TRUE =
/\Atrue\z/i- T =
/\At\z/i- FALSE =
/\Afalse\z/i- F =
/\Af\z/i- NULL_BYTE =
"\x00"- LABEL_MAKERS =
[Maker::Boolean, Maker::Number]
- FEATURE_MAKERS =
[Maker::Boolean, Maker::Date, Maker::Number, Maker::Ngram, Maker::Phrase]
- VERSION =
'0.3.3'
Instance Attribute Summary collapse
-
#dict ⇒ Object
readonly
Returns the value of attribute dict.
-
#logger ⇒ Object
Returns the value of attribute logger.
-
#options ⇒ Object
readonly
Returns the value of attribute options.
Instance Method Summary collapse
- #index(parts) ⇒ Object
-
#initialize(options = {}) ⇒ VectorEmbed
constructor
A new instance of VectorEmbed.
- #line(label, features = {}) ⇒ Object
- #preprocess(v) ⇒ Object
Constructor Details
#initialize(options = {}) ⇒ VectorEmbed
Returns a new instance of VectorEmbed.
28 29 30 31 32 33 34 35 36 |
# File 'lib/vector_embed.rb', line 28 def initialize( = {}) @options = .dup @mutex = Mutex.new @feature_makers = {} @logger = [:logger] || (l = Logger.new($stderr); l.level = Logger::INFO; l) if dict = @options.delete(:dict) @dict = dict.dup end end |
Instance Attribute Details
#dict ⇒ Object (readonly)
Returns the value of attribute dict.
26 27 28 |
# File 'lib/vector_embed.rb', line 26 def dict @dict end |
#logger ⇒ Object
Returns the value of attribute logger.
25 26 27 |
# File 'lib/vector_embed.rb', line 25 def logger @logger end |
#options ⇒ Object (readonly)
Returns the value of attribute options.
24 25 26 |
# File 'lib/vector_embed.rb', line 24 def @options end |
Instance Method Details
#index(parts) ⇒ Object
61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/vector_embed.rb', line 61 def index(parts) k = parts.join NULL_BYTE if dict k = Digest::MD5.digest k dict[k] || @mutex.synchronize do dict[k] ||= dict.length + 1 end else MurmurHash3::V32.str_hash(k).to_s[0..6].to_i end end |
#line(label, features = {}) ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/vector_embed.rb', line 38 def line(label, features = {}) feature_pairs = features.inject([]) do |memo, (k, v)| case v when Array v.each_with_index do |vv, i| memo.concat feature_maker([k, i].join(NULL_BYTE), vv).pairs(vv) end else memo.concat feature_maker(k, v).pairs(v) end memo end.compact.sort_by do |k_value, _| k_value end.map do |pair| pair.join ':' end ([label_maker(label).value(label)] + feature_pairs).join ' ' end |