Class: KeywordFinder::Keywords

Inherits:
Array
  • Object
show all
Defined in:
lib/keyword_finder/keywords.rb

Instance Method Summary collapse

Instance Method Details

#clean_sentence(sentence) ⇒ Object



19
20
21
# File 'lib/keyword_finder/keywords.rb', line 19

def clean_sentence sentence
  sentence.gsub(/(\.|\?|\,|\;)/," $1 ")
end

#combine_more_specifics(sentence) ⇒ Object



23
24
25
26
27
# File 'lib/keyword_finder/keywords.rb', line 23

def combine_more_specifics sentence
  sentence.
    gsub(/([A-Za-z]*\([A-Za-z]*\)[A-Za-z]+)/) { |s| s.gsub(/(\(|\))/,'') }.
    gsub(/([A-Za-z]+\([A-Za-z]*\)[A-Za-z]*)/) { |s| s.gsub(/(\(|\))/,'') }
end

#escape_regex_chars(string) ⇒ Object



6
7
8
# File 'lib/keyword_finder/keywords.rb', line 6

def escape_regex_chars string
  Regexp.escape(string).downcase
end

#find_in(sentence, options = {}) ⇒ Object

find in a sentence

Parameters:

  • sentence (String)

    that might contain the keywords this instance was initalized with

  • options; (Hash)

    notably the :subsentences_strategy, which can be one of :none, :ignore_if_found_in_main, :always_ignore



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/keyword_finder/keywords.rb', line 47

def find_in sentence, options={}
  options = {
    subsentences_strategy: :none # :none, :ignore_if_found_in_main, :always_ignore
  }.merge(options)

  sentence = sentence.downcase

  full_sentence_results = self.scan_part(sentence)

  sentence = self.combine_more_specifics(sentence)
  main_and_subs = self.separate_main_and_sub_sentences(sentence)
  main_results = self.scan_part(main_and_subs[:main])

  sub_results = []
  unless (
    options[:subsentences_strategy] == :always_ignore or
    (main_results.count > 0 and options[:subsentences_strategy] == :ignore_if_found_in_main)
    )
    sub_results = main_and_subs[:subs].collect{|subsentence| self.scan_part(subsentence)}.flatten
  end

  clean_sentence_results = main_results + sub_results

  return select_the_best_results(clean_sentence_results, full_sentence_results)
end

#ordered_by_lengthObject



3
4
5
# File 'lib/keyword_finder/keywords.rb', line 3

def ordered_by_length
  self.sort{|a,b| b.length <=> a.length }
end

#scan_in(sentence) ⇒ Object



15
16
17
# File 'lib/keyword_finder/keywords.rb', line 15

def scan_in sentence
  " #{sentence} ".scan(self.to_regex)
end

#scan_part(sentence) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
# File 'lib/keyword_finder/keywords.rb', line 29

def scan_part sentence
  scan_results = self.scan_in(self.clean_sentence(sentence))
  scan_results.flatten!
  scan_results.uniq!
  scan_results.compact!
  results = []
  scan_results.each do |result|
    results << result.strip unless result.strip.empty?
  end
  results
end

#select_the_best_results(result_set_a, result_set_b) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/keyword_finder/keywords.rb', line 73

def select_the_best_results result_set_a, result_set_b
  ## check whether there are better matches in the full sentence approach (or the other way around)
  result_set_a_to_delete = []
  result_set_b_to_delete = []

  result_set_a.each do |result_a|
    result_set_b.each do |result_b|
      if result_a.match(escape_regex_chars(result_b))
        result_set_b_to_delete << result_b
      elsif result_b.match(escape_regex_chars(result_a))
        result_set_a_to_delete << result_a
      end
    end
  end

  result_set_a_to_delete.each do |a|
    result_set_a.delete(a)
  end
  result_set_b_to_delete.each do |a|
    result_set_b.delete(a)
  end

  return result_set_a + result_set_b
end

#separate_main_and_sub_sentences(sentence) ⇒ Object



98
99
100
101
102
103
104
# File 'lib/keyword_finder/keywords.rb', line 98

def separate_main_and_sub_sentences sentence
  subs = sentence.scan(/(\(.*\))/).flatten
  subs.each do |subsentence|
    sentence = sentence.gsub(subsentence,"")
  end
  {main:sentence.strip,subs:subs.collect{|a| a[1..(a.length-2)].strip}}
end

#to_regexObject



9
10
11
12
13
# File 'lib/keyword_finder/keywords.rb', line 9

def to_regex
  @to_regex ||= Regexp.new("(#{
    self.ordered_by_length.collect{|a| "\\s#{self.escape_regex_chars(a)}\\s"}.join("|")
  })")
end