Class: XWS
- Inherits:
-
Object
- Object
- XWS
- Defined in:
- lib/xws.rb
Instance Method Summary collapse
-
#initialize(ignore_elements: %i(pre code time))) ⇒ XWS
constructor
A new instance of XWS.
- #scan(node) ⇒ Object
- #words(s) ⇒ Object
Constructor Details
#initialize(ignore_elements: %i(pre code time))) ⇒ XWS
10 11 12 13 14 |
# File 'lib/xws.rb', line 10 def initialize(ignore_elements: %i(pre code time)) @ignore_elements = ignore_elements @ignorewords = %i(the and or) end |
Instance Method Details
#scan(node) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/xws.rb', line 16 def scan(node) a = [] node.each_recursive do |x| puts 'x : ' + x.inspect if not x.name[/#{@ignore_elements.join('|')}/] then a += x.texts.map(&:strip).select{|x| not x.empty?} end end words(a.join(' ')).split.group_by(&:to_s).\ inject({}){|r, x| r.merge(x[0] => x[-1].length)} end |
#words(s) ⇒ Object
32 33 34 35 36 37 38 39 40 41 |
# File 'lib/xws.rb', line 32 def words(s) s.downcase. gsub(/\w+'\w+/,''). # remove words containing an apostrophe gsub(/["']/,''). # remove quotation marks gsub(/(\w)[^a-z ]+\B|\B[^a-z #]+(\w)/,'\1\2'). # remove non-alpabetical characters from start or beginning of words gsub(/\s.\s/,' '). # remove single digits gsub(/\b#{@ignorewords.join('|')}\b/,'') end |