Module: LCBO::CrawlKit::TagHelper

Defined in:
lib/lcbo/crawlkit/tag_helper.rb

Constant Summary collapse

DELETION_RE =
/\"|\\|\/|\(|\)|\[|\]|\./
WHITESPACE_RE =
/\*|\+|\&|\_|\,|\s/

Class Method Summary collapse

Class Method Details

.[](*values) ⇒ Object


47
48
49
50
# File 'lib/lcbo/crawlkit/tag_helper.rb', line 47

def self.[](*values)
  return [] if values.all? { |val| '' == val.to_s.strip }
  split(flatten(values))
end

.flatten(values) ⇒ Object


7
8
9
10
11
12
# File 'lib/lcbo/crawlkit/tag_helper.rb', line 7

def self.flatten(values)
  TitleCaseHelper.downcase(values.flatten.join(' ')).
    gsub(DELETION_RE, '').
    gsub(WHITESPACE_RE, ' ').
    strip
end

.split(str) ⇒ Object


14
15
16
17
18
19
20
21
# File 'lib/lcbo/crawlkit/tag_helper.rb', line 14

def self.split(str)
  [str, str.to_ascii].
    join(' ').
    split.
    map { |word| stem(word) }.
    flatten.
    uniq
end

.stem(word) ⇒ Object


23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/lcbo/crawlkit/tag_helper.rb', line 23

def self.stem(word)
  split = lambda { |word|
    if word.include?('-')
      words = word.split('-')
      a = words.dup
      a << word
      a << words.join
      a
    else
      [word]
    end
  }

  tokenize = lambda { |words|
    words.reduce([]) do |tokens, word|
      tokens << word
      tokens << word.gsub("'", '') if word.include?("'")
      tokens
    end
  }

  tokenize.(split.(word))
end