Module: Word2Vec
- Defined in:
- lib/word2vec/io.rb,
lib/word2vec/utils.rb,
lib/word2vec/version.rb,
lib/word2vec/word_vectors.rb,
lib/word2vec/word_clusters.rb,
lib/word2vec/scripts_interface.rb
Defined Under Namespace
Classes: WordClusters, WordVectors
Constant Summary collapse
- VERSION =
"0.1.1"
Class Method Summary collapse
- .doc2vec(train, output, size: 100, window: 5, sample: '1e-3', hs: 0, negative: 5, threads: 12, iter_: 5, min_count: 5, alpha: 0.025, debug: 2, binary: 1, cbow: 1, save_vocab: nil, read_vocab: nil, verbose: nil) ⇒ Object
- .load(fname, *args, kind: 'auto', **kwargs) ⇒ Object
- .load_clusters(fname) ⇒ Object
- .run_cmd(command, verbose: false) ⇒ Object
- .word2clusters(train, output, classes, size: 100, window: 5, sample: '1e-3', hs: 0, negative: 5, threads: 12, iter_: 5, min_count: 5, alpha: 0.025, debug: 2, binary: 1, cbow: 1, save_vocab: nil, read_vocab: nil, verbose: false) ⇒ Object
- .word2phrase(train, output, min_count: 5, threshold: 100, debug: 2, verbose: false) ⇒ Object
- .word2vec(train, output, size: 100, window: 5, sample: '1e-3', hs: 0, negative: 5, threads: 12, iter_: 5, min_count: 5, alpha: 0.025, debug: 2, binary: 1, cbow: 1, save_vocab: nil, read_vocab: nil, verbose: false) ⇒ Object
Class Method Details
.doc2vec(train, output, size: 100, window: 5, sample: '1e-3', hs: 0, negative: 5, threads: 12, iter_: 5, min_count: 5, alpha: 0.025, debug: 2, binary: 1, cbow: 1, save_vocab: nil, read_vocab: nil, verbose: nil) ⇒ Object
75 76 77 78 79 80 |
# File 'lib/word2vec/scripts_interface.rb', line 75 def self.doc2vec(train, output, size: 100, window: 5, sample: '1e-3', hs: 0, negative: 5, threads: 12, iter_: 5, min_count: 5, alpha: 0.025, debug: 2, binary: 1, cbow: 1, save_vocab: nil, read_vocab: nil, verbose: nil) raise NotImplementedError end |
.load(fname, *args, kind: 'auto', **kwargs) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
# File 'lib/word2vec/io.rb', line 2 def self.load(fname, *args, kind: 'auto', **kwargs) if kind == 'auto' if fname.end_with?('.bin') kind = 'bin' elsif fname.end_with?('.txt') kind = 'txt' else raise 'Could not identify kind' end end if kind == 'bin' Word2Vec::WordVectors.from_binary(fname, *args, **kwargs) elsif kind == 'txt' Word2Vec::WordVectors.from_text(fname, *args, **kwargs) elsif kind == 'mmap' Word2Vec::WordVectors.from_mmap(fname, *args, **kwargs) else raise 'Unknown kind' end end |
.load_clusters(fname) ⇒ Object
24 25 26 |
# File 'lib/word2vec/io.rb', line 24 def self.load_clusters(fname) Word2Vec::WordClusters.from_text(fname) end |
.run_cmd(command, verbose: false) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/word2vec/scripts_interface.rb', line 82 def self.run_cmd(command, verbose: false) p command.join(' ') system(command.join(' ')) # TODO: implement it later # if verbose # while line = stdout.readline # $stdout.write(line) # if line.include?('ERROR:') # raise Exception(line) # end # $stdout.flush # end # end end |
.word2clusters(train, output, classes, size: 100, window: 5, sample: '1e-3', hs: 0, negative: 5, threads: 12, iter_: 5, min_count: 5, alpha: 0.025, debug: 2, binary: 1, cbow: 1, save_vocab: nil, read_vocab: nil, verbose: false) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/word2vec/scripts_interface.rb', line 30 def self.word2clusters(train, output, classes, size: 100, window: 5, sample: '1e-3', hs: 0, negative: 5, threads: 12, iter_: 5, min_count: 5, alpha: 0.025, debug: 2, binary: 1, cbow: 1, save_vocab: nil, read_vocab: nil, verbose: false) ext = File.('../../../ext/word2vec', __FILE__) command = [File.join(ext, 'word2vec')] args = ['-train', '-output', '-size', '-window', '-sample', '-hs', '-negative', '-threads', '-iter', '-min-count', '-alpha', '-debug', '-binary', '-cbow', '-classes'] values = [train, output, size, window, sample, hs, negative, threads, iter_, min_count, alpha, debug, binary, cbow, classes] args.zip(values).each do |arg, value| command << arg command << value.to_s end if save_vocab != nil command << '-save-vocab' command << save_vocab.to_s end if read_vocab != nil command << '-read-vocab' command << read_vocab.to_s end run_cmd(command, verbose: verbose) end |
.word2phrase(train, output, min_count: 5, threshold: 100, debug: 2, verbose: false) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/word2vec/scripts_interface.rb', line 60 def self.word2phrase(train, output, min_count: 5, threshold: 100, debug: 2, verbose: false) ext = File.('../../../ext/word2vec', __FILE__) command = [File.join(ext, 'word2phrase')] args = ['-train', '-output', '-min-count', '-threshold', '-debug'] values = [train, output, min_count, threshold, debug] args.zip(values).each do |arg, value| command << arg command << value.to_s end run_cmd(command, verbose: verbose) end |
.word2vec(train, output, size: 100, window: 5, sample: '1e-3', hs: 0, negative: 5, threads: 12, iter_: 5, min_count: 5, alpha: 0.025, debug: 2, binary: 1, cbow: 1, save_vocab: nil, read_vocab: nil, verbose: false) ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/word2vec/scripts_interface.rb', line 2 def self.word2vec(train, output, size: 100, window: 5, sample: '1e-3', hs: 0, negative: 5, threads: 12, iter_: 5, min_count: 5, alpha: 0.025, debug: 2, binary: 1, cbow: 1, save_vocab: nil, read_vocab: nil, verbose: false) ext = File.('../../../ext/word2vec', __FILE__) command = [File.join(ext, 'word2vec')] args = ['-train', '-output', '-size', '-window', '-sample', '-hs', '-negative', '-threads', '-iter', '-min-count', '-alpha', '-debug', '-binary', '-cbow'] values = [train, output, size, window, sample, hs, negative, threads, iter_, min_count, alpha, debug, binary, cbow] args.zip(values).each do |arg, value| command << arg command << value.to_s end if save_vocab != nil command << '-save-vocab' command << save_vocab.to_s end if read_vocab != nil command << '-read-vocab' command << read_vocab.to_s end run_cmd(command, verbose: verbose) end |