Class: TWSS::Trainer

Inherits:
Object
  • Object
show all
Defined in:
lib/twss/trainer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(engine, options = {}) ⇒ Trainer

Returns a new instance of Trainer.



9
10
11
12
13
# File 'lib/twss/trainer.rb', line 9

def initialize(engine, options = {})
  @engine = engine
  engine.clear_state!
  @training_set_size = options[:training_set_size] || 100
end

Instance Attribute Details

#engineObject (readonly)

Returns the value of attribute engine.



7
8
9
# File 'lib/twss/trainer.rb', line 7

def engine
  @engine
end

Instance Method Details

#run_examplesObject



51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/twss/trainer.rb', line 51

def run_examples
  ["how big is that thing going to get?",
   "umm... that's the not the right hole",
   "did you resolve the ticket?",
   "did you fix the bug?",
   "you're going to need to go faster",
   "I'm almost there, keep going",
   "Ok, send me a pull request",
   "The president issued a decree",
   "I don't get it, this isn't working correctly",
   "finished specialties in the warehouse"].each do |s|
     puts '"' + s + '" => ' + TWSS(s).to_s
   end
end

#strip_tweet(text) ⇒ Object

A little cleanup of the text before we train on it.



41
42
43
44
45
46
47
48
49
# File 'lib/twss/trainer.rb', line 41

def strip_tweet(text)
  t = text.gsub(/[\@\#]\w+\b/i, '') # strip mentions and hashtags
  t.gsub!(/(RT|OH)\W/i, '') # strip RT's and OH's
  t.gsub!(/twss/i, '') # strip out twss itself
  t.gsub!(/http:\/\/[A-Za-z0-9\.\/]+/, '') # URLs
  t.gsub!(/[\W\d]/, ' ') # now all non word chars and numbers
  t.strip!
  t
end

#trainObject



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/twss/trainer.rb', line 15

def train
  path = File.join(File.dirname(__FILE__), '../../data/')

  puts "Clearing state..."
  engine.clear_state!

  puts "Training NON-TWSS strings..."
  File.read(File.join(path, 'non_twss.txt')).each_line do |l|
    engine.train(TWSS::Engine::FALSE, strip_tweet(l))
  end

  puts "Training TWSS strings..."
  File.read(File.join(path, 'twss.txt')).each_line do |l|
    engine.train(TWSS::Engine::TRUE, strip_tweet(l))
  end

  puts "Writing to file..."
  engine.dump_classifier_to_file

  puts "Done."
  puts

  run_examples
end