Class: KeywordFinder::Keywords
- Inherits:
-
Array
- Object
- Array
- KeywordFinder::Keywords
- Defined in:
- lib/keyword_finder/keywords.rb
Instance Method Summary collapse
- #clean_sentence(sentence) ⇒ Object
- #combine_more_specifics(sentence) ⇒ Object
- #escape_regex_chars(string) ⇒ Object
-
#find_in(sentence, options = {}) ⇒ Object
find in a sentence.
- #ordered_by_length ⇒ Object
- #scan_in(sentence, options = {}) ⇒ Object
- #scan_part(sentence, options = {}) ⇒ Object
- #select_the_best_results(result_set_a, result_set_b) ⇒ Object
- #separate_main_and_sub_sentences(sentence) ⇒ Object
- #to_regex(options = {}) ⇒ Object
Instance Method Details
#clean_sentence(sentence) ⇒ Object
27 28 29 |
# File 'lib/keyword_finder/keywords.rb', line 27 def clean_sentence sentence sentence.gsub(/(\.|\?|\,|\;)/," $1 ").gsub(/\n|\s/, ' ').gsub(/[[:space:]]/,' ') end |
#combine_more_specifics(sentence) ⇒ Object
31 32 33 34 35 |
# File 'lib/keyword_finder/keywords.rb', line 31 def combine_more_specifics sentence sentence. gsub(/([A-Za-z]*\([A-Za-z]*\)[A-Za-z]+)/) { |s| s.gsub(/(\(|\))/,'') }. gsub(/([A-Za-z]+\([A-Za-z]*\)[A-Za-z]*)/) { |s| s.gsub(/(\(|\))/,'') } end |
#escape_regex_chars(string) ⇒ Object
6 7 8 |
# File 'lib/keyword_finder/keywords.rb', line 6 def escape_regex_chars string Regexp.escape(string).downcase end |
#find_in(sentence, options = {}) ⇒ Object
find in a sentence
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/keyword_finder/keywords.rb', line 55 def find_in sentence, ={} = { subsentences_strategy: :none, # :none, :ignore_if_found_in_main, :always_ignore entire_words_only: true }.merge() sentence = sentence.downcase.gsub(/\n/," ") full_sentence_results = self.scan_part(sentence, ) sentence = self.combine_more_specifics(sentence) main_and_subs = self.separate_main_and_sub_sentences(sentence) main_results = self.scan_part(main_and_subs[:main], ) sub_results = [] unless ( [:subsentences_strategy] == :always_ignore or (main_results.count > 0 and [:subsentences_strategy] == :ignore_if_found_in_main) ) sub_results = main_and_subs[:subs].collect{|subsentence| self.scan_part(subsentence, )}.flatten end clean_sentence_results = main_results + sub_results return select_the_best_results(clean_sentence_results, full_sentence_results) end |
#ordered_by_length ⇒ Object
3 4 5 |
# File 'lib/keyword_finder/keywords.rb', line 3 def ordered_by_length self.sort{|a,b| b.length <=> a.length } end |
#scan_in(sentence, options = {}) ⇒ Object
23 24 25 |
# File 'lib/keyword_finder/keywords.rb', line 23 def scan_in sentence, ={} " #{sentence} ".scan(self.to_regex()) end |
#scan_part(sentence, options = {}) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/keyword_finder/keywords.rb', line 37 def scan_part sentence, ={} scan_results = self.scan_in(self.clean_sentence(sentence), ) scan_results.flatten! scan_results.uniq! scan_results.compact! results = [] scan_results.each do |result| results << result.strip unless result.strip.empty? end results.collect{|a| a.gsub(' ', ' ')} end |
#select_the_best_results(result_set_a, result_set_b) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/keyword_finder/keywords.rb', line 82 def select_the_best_results result_set_a, result_set_b ## check whether there are better matches in the full sentence approach (or the other way around) result_set_a_to_delete = [] result_set_b_to_delete = [] result_set_a.each do |result_a| result_set_b.each do |result_b| if result_a.match(escape_regex_chars(result_b)) result_set_b_to_delete << result_b elsif result_b.match(escape_regex_chars(result_a)) result_set_a_to_delete << result_a end end end result_set_a_to_delete.each do |a| result_set_a.delete(a) end result_set_b_to_delete.each do |a| result_set_b.delete(a) end return result_set_a + result_set_b end |
#separate_main_and_sub_sentences(sentence) ⇒ Object
107 108 109 110 111 112 113 |
# File 'lib/keyword_finder/keywords.rb', line 107 def separate_main_and_sub_sentences sentence subs = sentence.scan(/(\(.*\))/).flatten subs.each do |subsentence| sentence = sentence.gsub(subsentence,"") end {main:sentence.strip,subs:subs.collect{|a| a[1..(a.length-2)].strip}} end |
#to_regex(options = {}) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/keyword_finder/keywords.rb', line 10 def to_regex(={}) = {entire_words_only: true}.merge() spacer = [:entire_words_only] ? "\\s" : "" @to_regex = {} unless defined?(@to_regex) @to_regex[[:entire_words_only]] ||= Regexp.new("(#{ self.ordered_by_length.collect do |a| a_spacer = spacer a_spacer = "" if (options[:entire_words_only] == :when_short and a.length > 3) "#{a_spacer}#{self.escape_regex_chars(a.gsub(' ', ' '))}#{a_spacer}" end.join("|") })") end |