Class: ChatCorrect::CombineMultiWordVerbs

Inherits:
Object
  • Object
show all
Defined in:
lib/chat_correct/combine_multi_word_verbs.rb

Constant Summary collapse

TOKEN_ARRAY =
['are', 'am', 'was', 'were', 'have', 'has', 'had', 'will', 'would', 'could', 'did', 'arenƪt', 'wasnƪt', 'werenƪt', 'havenƪt', 'hasnƪt', 'hadnƪt', 'wouldnƪt', 'couldnƪt', 'didnƪt']
TOKEN_ARRAY_2 =
['are', 'am', 'was', 'were', 'have', 'has', 'had', 'will', 'would', 'did', 'could']

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text:, tgr:) ⇒ CombineMultiWordVerbs

Returns a new instance of CombineMultiWordVerbs.



6
7
8
9
# File 'lib/chat_correct/combine_multi_word_verbs.rb', line 6

def initialize(text:, tgr:)
  @text = text
  @tgr = tgr
end

Instance Attribute Details

#textObject (readonly)

Returns the value of attribute text.



5
6
7
# File 'lib/chat_correct/combine_multi_word_verbs.rb', line 5

def text
  @text
end

#tgrObject (readonly)

Returns the value of attribute tgr.



5
6
7
# File 'lib/chat_correct/combine_multi_word_verbs.rb', line 5

def tgr
  @tgr
end

Instance Method Details

#combineObject



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/chat_correct/combine_multi_word_verbs.rb', line 11

def combine
  tokens = ChatCorrect::Tokenize.new(text: text).tokenize
  sentence_tagged = tgr.add_tags(text).split
  tokens_to_delete = []
  tokens.each_with_index do |token, index|
    case
    when ((token.eql?('will') && tokens[index + 1].eql?('have')) || (token.eql?('would') && tokens[index + 1].eql?('have')) || (token.eql?('had') && tokens[index + 1].eql?('been'))) &&
      sentence_tagged[index + 2].to_s.partition('>').first[1..-1][0].eql?('v')
        tokens[index] = token + ' ' + tokens[index + 1] + ' ' + tokens[index + 2]
        tokens_to_delete << tokens[index + 1].to_s
        tokens_to_delete << tokens[index + 2].to_s
    when TOKEN_ARRAY_2.include?(token) &&
      tokens[index + 1].to_s.eql?('not') &&
      sentence_tagged[index + 2].to_s[1].to_s.eql?('v')
        tokens[index] = token + ' ' + tokens[index + 1] + ' ' + tokens[index + 2]
        tokens_to_delete << tokens[index + 1].to_s
        tokens_to_delete << tokens[index + 2].to_s
    when TOKEN_ARRAY.include?(token) &&
      (sentence_tagged[index + 1].to_s[1].to_s.eql?('v') ||
      sentence_tagged[index + 1].to_s[1..2].to_s.eql?('rb')) &&
      tokens[index - 1].exclude?(' ') &&
      tokens[index + 1] != 'had'
        tokens[index] = token + ' ' + tokens[index + 1]
        tokens_to_delete << tokens[index + 1].to_s
    end
  end
  delete_tokens_from_array(tokens, tokens_to_delete)
end