Class: ClauseExtractor
- Inherits:
-
Object
- Object
- ClauseExtractor
- Defined in:
- lib/clause_extractor.rb
Class Method Summary collapse
- .get_clauses(phrase, format = String.new) ⇒ Object
- .get_match_start_index(verb, match, index) ⇒ Object
- .prioritize_ranges(ranges, lo, hi, match) ⇒ Object
- .scan_phrase(phrase, list, regex, verb, tense_label, index, ranges) ⇒ Object
Class Method Details
.get_clauses(phrase, format = String.new) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/clause_extractor.rb', line 5 def self.get_clauses(phrase, format = String.new) @format = format phrase = phrase.downcase #list = format.match("audioverb") ? Hash.new : Array.new list = Hash.new @verbs ||= get_verbs @tiempos ||= get_tiempos @id_tiempo ||= get_id_tiempos @tense_id ||= get_tenses @con_id ||= get_con_id ranges = [] phrase.gsub!(/[!.?\(\)]/,"") if phrase phrase_a = phrase.split(/\s+/) phrase_a.length.times do |i| # phrase_a[i].gsub!(/[!.?\(\)]/,"") if phrase_a[i] #remove any punctuation from the word if @con_id[phrase_a[i]] then #if word matches a conjugation $tense_regexes.each do |k,v| if k.match(/#{@id_tiempo[@tiempos[phrase_a[i]]]}/) v.each do |tense, regex_array| regex_array.each do |regex| regex = regex.to_s.gsub("search", "#{phrase_a[i]}") phrase, list, ranges = scan_phrase(phrase, list, regex, phrase_a[i], tense, i, ranges) end end end end end end list.each { |k, v| list.delete(k) unless ranges.include?(v) } list.each { |k, v| print "#{k}\n" } list end |
.get_match_start_index(verb, match, index) ⇒ Object
39 40 41 42 43 44 45 46 |
# File 'lib/clause_extractor.rb', line 39 def self.get_match_start_index(verb, match, index) #get start position of last occurence of verb in match verb_index_in_match = match.index /#{verb}(?!.*#{verb})/i #count spaces between match start and verb_index_in_match and subtract that from index lo = index - match[0,verb_index_in_match].split(/\s+/).size hi = lo + match[0,verb_index_in_match].split(/\s+/).size return lo, hi end |
.prioritize_ranges(ranges, lo, hi, match) ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/clause_extractor.rb', line 62 def self.prioritize_ranges(ranges, lo, hi,match) range = (lo..hi) ranges.size.times.each do |r| #replace old range with new one if start is same point and new range is longer if ranges[r].begin == lo and ranges[r].count < range.count ranges[r] = range elsif (range.include?(ranges[r].begin) || range.include?(ranges[r].end)) && range.count > ranges[r].count ranges.delete_at(r) end end #add range to ranges if it is not already included in an existing range if ranges.each.select{|r| r.include?(lo) || r.include?(hi)}.size == 0 ranges << range end ranges end |
.scan_phrase(phrase, list, regex, verb, tense_label, index, ranges) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/clause_extractor.rb', line 48 def self.scan_phrase(phrase, list, regex, verb, tense_label, index, ranges) if match = phrase.match(/#{regex}/i) match = match.to_s lo, hi = get_match_start_index(verb, match, index) ranges = prioritize_ranges(ranges, lo, hi,match) if @format.match(/audioverb/) list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi) else list["#{tense_label}:" + match.to_s + ":" + (lo..hi).to_s] = (lo..hi) end end return phrase, list, ranges end |