Class: TextQuantity

Inherits:
Object
  • Object
show all
Defined in:
lib/automated_metareview/text_quantity.rb

Instance Method Summary collapse

Instance Method Details

#number_of_unique_tokens(text_array) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/automated_metareview/text_quantity.rb', line 5

def number_of_unique_tokens(text_array)
  pre_string = "" #preString helps keep track of the text that has been checked for unique tokens and text that has not
  count = 0 #counts the number of unique tokens
  instance = WordnetBasedSimilarity.new
  text_array.each{
    |text|
    tp = TextPreprocessing.new
    text = tp.contains_punct(text)
    all_tokens = text.split(" ")
    all_tokens.each{ 
      |token|
      if(!instance.is_frequent_word(token.downcase)) #do not count this word if it is a frequent word
        if(!pre_string.downcase.include?(token.downcase)) #if the token was not already seen earlier i.e. not a part of the preString
          count+=1
        end  
      end  
      pre_string = pre_string +" " + token.downcase #adding token to the preString
    }
  }
  return count
end