Class: XWS

Inherits:
Object
  • Object
show all
Defined in:
lib/xws.rb

Instance Method Summary collapse

Constructor Details

#initialize(ignore_elements: %i(pre code time))) ⇒ XWS



10
11
12
13
14
# File 'lib/xws.rb', line 10

def initialize(ignore_elements: %i(pre code time))

  @ignore_elements = ignore_elements
  @ignorewords = %i(the and or)
end

Instance Method Details

#scan(node) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/xws.rb', line 16

def scan(node)

  a = []

  node.each_recursive do |x|
    puts 'x : ' + x.inspect 
    if not x.name[/#{@ignore_elements.join('|')}/] then
      a += x.texts.map(&:strip).select{|x| not x.empty?}
    end
  end

  words(a.join(' ')).split.group_by(&:to_s).\
                             inject({}){|r, x| r.merge(x[0] => x[-1].length)}

end

#words(s) ⇒ Object



32
33
34
35
36
37
38
39
40
41
# File 'lib/xws.rb', line 32

def words(s)

  s.downcase. 
    gsub(/\w+'\w+/,'').  # remove words containing an apostrophe
    gsub(/["']/,'').     # remove quotation marks
    gsub(/(\w)[^a-z ]+\B|\B[^a-z #]+(\w)/,'\1\2').     # remove non-alpabetical characters from start or beginning of words
    gsub(/\s.\s/,' ').      # remove single digits 
    gsub(/\b#{@ignorewords.join('|')}\b/,'')
  
end