Class: KeywordMatcher::Prophet

Inherits:
Object
  • Object
show all
Defined in:
lib/keyword_matcher/prophet.rb

Constant Summary collapse

PRECISION =
0.5
SPLIT =
0.2
SEPARATOR =
%r{[\s\(\)\/*:"'\\\/\$\.,=]+}
MEASURES =
'кг|г|л|мл|уп|ед|шт|мг|пак'.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(phrase) ⇒ Prophet

Returns a new instance of Prophet.



10
11
12
# File 'lib/keyword_matcher/prophet.rb', line 10

def initialize(phrase)
  @phrase = phrase
end

Instance Attribute Details

#phraseObject (readonly)

Returns the value of attribute phrase.



3
4
5
# File 'lib/keyword_matcher/prophet.rb', line 3

def phrase
  @phrase
end

Instance Method Details

#explodeObject



14
15
16
17
18
19
20
21
# File 'lib/keyword_matcher/prophet.rb', line 14

def explode
  prepare
    .split(SEPARATOR)
    .map(&:strip)
    .map(&:downcase)
    .reject { |w| w =~ /\d{5,}/ }
    .reject(&:blank?)
end

#prepareObject



23
24
25
26
27
28
29
30
31
32
# File 'lib/keyword_matcher/prophet.rb', line 23

def prepare
  phrase.gsub(/(\p{Ll}{2,})(\d+\S)/, '\1 \2') # split text contains > 1 character from digits
        .gsub(/%([\p{L}\d])/, '% \1') # add space after percents
        .gsub(/(\d)[\.,](\d)/, '\1-\2') # replace separator between digits from , or . to -
        .gsub(/(\d)[\.,\s]+(#{MEASURES})\.?/i, '\1\2') # replace gaps between numbers and measures
        .gsub(/(\p{Ll})(\p{Lu})/, '\1 \2') # split camelcase string
        .gsub(/(\d)-0+(#{MEASURES})/i, '\1\2') # remove trailing zeroes after measures
        .gsub(/([а-яa-z])(\d+)(#{MEASURES})/i, '\1 \2\3')
        .downcase
end