Class: PlagiarismChecker

Inherits:
Object
  • Object
show all
Defined in:
lib/automated_metareview/plagiarism_check.rb

Instance Method Summary collapse

Instance Method Details

#check_for_plagiarism(review_text, subm_text) ⇒ Object

reviewText and submText are array containing review and submission texts



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/automated_metareview/plagiarism_check.rb', line 10

def check_for_plagiarism(review_text, subm_text)
  result = false
  for l in 0..review_text.length - 1 #iterating through the review's sentences
    review = review_text[l].to_s
    # puts "review.class #{review.to_s.class}.. review - #{review}"
    for m in 0..subm_text.length - 1 #iterating though the submission's sentences
      submission = subm_text[m].to_s  
      # puts "submission.class #{submission.to_s.class}..submission - #{submission}"
      rev_len = 0
      
      rev = review.split(" ") #review's tokens, taking 'n' at a time
      array = review.split(" ")
      
      while(rev_len < array.length) do
        if(array[rev_len] == " ") #skipping empty
          rev_len+=1
          next
        end
        
        #generating the sentence segment you'd like to compare
        rev_phrase = array[rev_len]
        add = 0 #add on to this when empty strings found  
        for j in rev_len+1..(NGRAM+rev_len+add-1) #concatenating 'n' tokens
          if(j < array.length)
            if(array[j] == "") #skipping empty
              add+=1
              next
            end
            rev_phrase = rev_phrase +" "+  array[j]
          end
        end
        
        if(j == array.length)
          #if j has reached the end of the array, then reset rev_len to the end of array to, or shorter strings will be compared
          rev_len = array.length
        end
        
        #replacing punctuation
        tp = TextPreprocessing.new
        submission = tp.contains_punct(submission)
        rev_phrase = tp.contains_punct(rev_phrase)
        #puts "Review phrase: #{rev_phrase} .. #{rev_phrase.split(" ").length}"
        
        #checking if submission contains the review and that only NGRAM number of review tokens are compared
        if(rev_phrase.split(" ").length == NGRAM and submission.downcase.include?(rev_phrase.downcase))
          result = true
          break
        end
        #System.out.println("^^^ Plagiarism result:: "+result);
        rev_len+=1
      end #end of the while loop
      if(result == true)
        break
      end  
    end #end of for loop for submission
    if(result == true)
      break
    end
  end #end of for loop for reviews    
  return result
end

#compare_reviews_with_questions_responses(auto_metareview, map_id) ⇒ Object

Checking if the response has been copied from the review questions or from other responses submitted.



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/automated_metareview/plagiarism_check.rb', line 76

def compare_reviews_with_questions_responses(auto_metareview, map_id)
  review_text_arr = auto_metareview.review_array
  response = Response.find(:first, :conditions => ["map_id = ?", map_id])
  scores = Score.find(:all, :conditions => ["response_id = ?", response.id])
  questions = Array.new
  #fetching the questions for the responses
  for i in 0..scores.length - 1
    questions << Question.find_by_sql(["Select * from questions where id = ?", scores[i].question_id])[0].txt
  end
  
  count_copies = 0 #count of the number of responses that are copies either of questions of other responses
  rev_array = Array.new #holds the non-plagiairised responses
  #comparing questions with text
  for i in 0..scores.length - 1
    if(!questions[i].nil? and !review_text_arr[i].nil? and questions[i].downcase == review_text_arr[i].downcase)
      count_copies+=1
      next #skip comparing with other responses
    end
    
    #comparing response with other responses
    flag = 0
    for j in 0..review_text_arr.length - 1
      if(i != j and !review_text_arr[i].nil? and !review_text_arr[j].nil? and review_text_arr[i].downcase == review_text_arr[j].downcase)
        count_copies+=1
        flag = 1
        break
      end
    end

    if(flag == 0) #ensuring no match with any of the review array's responses
      rev_array << review_text_arr[i]
    end
  end
  
  #setting @review_array as rev_array
  if(count_copies > 0) #resetting review_array only when plagiarism was found
     auto_metareview.review_array = rev_array
  end
  
  if(count_copies > 0 and count_copies == scores.length)
    return ALL_RESPONSES_PLAGIARISED #plagiarism, with all other metrics 0
  elsif(count_copies > 0)
    return SOME_RESPONSES_PLAGIARISED #plagiarism, while evaluating other metrics
  end
end

#google_search_response(auto_metareview) ⇒ Object

Checking if the response was copied from google



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/automated_metareview/plagiarism_check.rb', line 125

def google_search_response(auto_metareview)
  review_text_arr = auto_metareview.review_array
  # require 'ruby-web-search'
  count = 0
  temp_array = Array.new
  review_text_arr.each{
    |rev_text|
    if(!rev_text.nil?)
      #placing the search text within quotes to search exact match for the complete text
      response = RubyWebSearch::Google.search(:query => "\""+ rev_text +"\"") 
      #if the results are greater than 0, then the text has been copied
      if(response.results.length > 0)
        count+=1
      else
        temp_array << rev_text #copying the non-plagiarised text for evaluation
      end
    end
  }
  #setting temp_array as the @review_array
  auto_metareview.review_array = temp_array
  
  if(count > 0)
    return true
  else
    return false
  end
end