Module: TextParser

Included in:
String
Defined in:
lib/text_parser.rb,
lib/text_parser/version.rb

Constant Summary collapse

VERSION =
"0.3.0"

Instance Method Summary collapse

Instance Method Details

#parse(args = {}) ⇒ Array of Hash

Returns a parsed text with the words and its occurrences.

args
Symbol

:dictionary, :order, :order_direction, :negative_dictionary

Parameters:

  • [args] (Hash)

Returns:

  • (Array of Hash)


9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/text_parser.rb', line 9

def parse(args = {})
  args.delete_if {|key, value| value.nil? }
  options = {
    :order => :word,
    :order_direction => :asc,
    :negative_dictionary => []
  }.merge(args)
  result = []
  text = self.gsub(/[^A-Za-zÀ-ú0-9\-]/u," ").strip
  options[:dictionary] = text.split(" ") unless options[:dictionary]
  return [] if options[:dictionary].count < 1
  regex = Regexp.new(options[:dictionary].join('\\b|\\b'), Regexp::IGNORECASE)
  match_result = text.scan(regex).map{|i| i.downcase}
  match_result = match_result.select{|i| i.size >= options[:minimum_length]} if options[:minimum_length]
  match_result.each do |w|
    result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift || options[:negative_dictionary].map{|i| i.downcase}.include?(w)
  end
  result = result.sort_by{|i| i[options[:order]]}
  result.reverse! if options[:order_direction] == :desc
  result
end