Class: KeywordMatcher::Prophet

Inherits:
Object
  • Object
show all
Defined in:
lib/keyword_matcher/prophet.rb

Constant Summary collapse

SEPARATOR =
%r{[\s\(\)\/*:"#'\[\];<>\\\/\$\.,=“”«»]+}.freeze
MEASURES =
'кг|г|л|мл|уп|ед|шт|мг|пак'.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(phrase) ⇒ Prophet

Returns a new instance of Prophet.



8
9
10
# File 'lib/keyword_matcher/prophet.rb', line 8

def initialize(phrase)
  @phrase = phrase
end

Instance Attribute Details

#phraseObject (readonly)

Returns the value of attribute phrase.



3
4
5
# File 'lib/keyword_matcher/prophet.rb', line 3

def phrase
  @phrase
end

Instance Method Details

#explodeObject



12
13
14
15
16
17
# File 'lib/keyword_matcher/prophet.rb', line 12

def explode
  prepare
    .split(SEPARATOR)
    .map(&:strip)
    .reject(&:blank?)
end

#prepareObject



19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/keyword_matcher/prophet.rb', line 19

def prepare
  phrase.gsub(/\d{5,}/, '') # remove gteq five-digit
        .gsub(/(\p{Ll}{2,})(\d+\S)/, '\1 \2') # split text contains > 1 character from digits
        .gsub(/%([\p{L}\d])/, '% \1') # add space after percents
        .gsub(/(\d)[\.,](\d)/, '\1-\2') # replace separator between digits from , or . to -
        .gsub(/(\d)[\.,\s]+(#{MEASURES})\.?/i, '\1\2') # replace gaps between numbers and measures
        .gsub(/(\p{Ll})(\p{Lu})/, '\1 \2') # split camelcase string
        .gsub(/(\d)-0+(#{MEASURES})/i, '\1\2') # remove trailing zeroes after measures
        .gsub(/([а-яa-z])(\d+)(#{MEASURES})/i, '\1 \2\3') # add space between word and measure
        .gsub(/(\d+)(#{MEASURES})([x|х])/i, '\1\2 \3') # add space before amount
        .gsub(/([а-я])([a-z]{2,})/i, '\1 \2') # add space between alternating Russian English
        .downcase
end