Class: KeywordFinder::Keywords
- Inherits:
-
Array
- Object
- Array
- KeywordFinder::Keywords
- Defined in:
- lib/keyword_finder/keywords.rb
Instance Method Summary collapse
- #clean_sentence(sentence) ⇒ Object
- #combine_more_specifics(sentence) ⇒ Object
- #escape_regex_chars(string) ⇒ Object
-
#find_in(sentence, options = {}) ⇒ Object
find in a sentence.
- #ordered_by_length ⇒ Object
- #scan_in(sentence) ⇒ Object
- #scan_part(sentence) ⇒ Object
- #select_the_best_results(result_set_a, result_set_b) ⇒ Object
- #separate_main_and_sub_sentences(sentence) ⇒ Object
- #to_regex ⇒ Object
Instance Method Details
#clean_sentence(sentence) ⇒ Object
19 20 21 |
# File 'lib/keyword_finder/keywords.rb', line 19 def clean_sentence sentence sentence.gsub(/(\.|\?|\,|\;)/," $1 ") end |
#combine_more_specifics(sentence) ⇒ Object
23 24 25 26 27 |
# File 'lib/keyword_finder/keywords.rb', line 23 def combine_more_specifics sentence sentence. gsub(/([A-Za-z]*\([A-Za-z]*\)[A-Za-z]+)/) { |s| s.gsub(/(\(|\))/,'') }. gsub(/([A-Za-z]+\([A-Za-z]*\)[A-Za-z]*)/) { |s| s.gsub(/(\(|\))/,'') } end |
#escape_regex_chars(string) ⇒ Object
6 7 8 |
# File 'lib/keyword_finder/keywords.rb', line 6 def escape_regex_chars string Regexp.escape(string).downcase end |
#find_in(sentence, options = {}) ⇒ Object
find in a sentence
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/keyword_finder/keywords.rb', line 47 def find_in sentence, ={} = { subsentences_strategy: :none # :none, :ignore_if_found_in_main, :always_ignore }.merge() sentence = sentence.downcase full_sentence_results = self.scan_part(sentence) sentence = self.combine_more_specifics(sentence) main_and_subs = self.separate_main_and_sub_sentences(sentence) main_results = self.scan_part(main_and_subs[:main]) sub_results = [] unless ( [:subsentences_strategy] == :always_ignore or (main_results.count > 0 and [:subsentences_strategy] == :ignore_if_found_in_main) ) sub_results = main_and_subs[:subs].collect{|subsentence| self.scan_part(subsentence)}.flatten end clean_sentence_results = main_results + sub_results return select_the_best_results(clean_sentence_results, full_sentence_results) end |
#ordered_by_length ⇒ Object
3 4 5 |
# File 'lib/keyword_finder/keywords.rb', line 3 def ordered_by_length self.sort{|a,b| b.length <=> a.length } end |
#scan_in(sentence) ⇒ Object
15 16 17 |
# File 'lib/keyword_finder/keywords.rb', line 15 def scan_in sentence " #{sentence} ".scan(self.to_regex) end |
#scan_part(sentence) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/keyword_finder/keywords.rb', line 29 def scan_part sentence scan_results = self.scan_in(self.clean_sentence(sentence)) scan_results.flatten! scan_results.uniq! scan_results.compact! results = [] scan_results.each do |result| results << result.strip unless result.strip.empty? end results end |
#select_the_best_results(result_set_a, result_set_b) ⇒ Object
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/keyword_finder/keywords.rb', line 73 def select_the_best_results result_set_a, result_set_b ## check whether there are better matches in the full sentence approach (or the other way around) result_set_a_to_delete = [] result_set_b_to_delete = [] result_set_a.each do |result_a| result_set_b.each do |result_b| if result_a.match(escape_regex_chars(result_b)) result_set_b_to_delete << result_b elsif result_b.match(escape_regex_chars(result_a)) result_set_a_to_delete << result_a end end end result_set_a_to_delete.each do |a| result_set_a.delete(a) end result_set_b_to_delete.each do |a| result_set_b.delete(a) end return result_set_a + result_set_b end |
#separate_main_and_sub_sentences(sentence) ⇒ Object
98 99 100 101 102 103 104 |
# File 'lib/keyword_finder/keywords.rb', line 98 def separate_main_and_sub_sentences sentence subs = sentence.scan(/(\(.*\))/).flatten subs.each do |subsentence| sentence = sentence.gsub(subsentence,"") end {main:sentence.strip,subs:subs.collect{|a| a[1..(a.length-2)].strip}} end |
#to_regex ⇒ Object
9 10 11 12 13 |
# File 'lib/keyword_finder/keywords.rb', line 9 def to_regex @to_regex ||= Regexp.new("(#{ self.ordered_by_length.collect{|a| "\\s#{self.escape_regex_chars(a)}\\s"}.join("|") })") end |