Module: TextParser
- Included in:
- String
- Defined in:
- lib/text_parser.rb,
lib/text_parser/version.rb
Constant Summary collapse
- VERSION =
"0.3.0"
Instance Method Summary collapse
-
#parse(args = {}) ⇒ Array of Hash
Returns a parsed text with the words and its occurrences.
Instance Method Details
#parse(args = {}) ⇒ Array of Hash
Returns a parsed text with the words and its occurrences.
- args
- Symbol
-
:dictionary, :order, :order_direction, :negative_dictionary
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/text_parser.rb', line 9 def parse(args = {}) args.delete_if {|key, value| value.nil? } = { :order => :word, :order_direction => :asc, :negative_dictionary => [] }.merge(args) result = [] text = self.gsub(/[^A-Za-zÀ-ú0-9\-]/u," ").strip [:dictionary] = text.split(" ") unless [:dictionary] return [] if [:dictionary].count < 1 regex = Regexp.new([:dictionary].join('\\b|\\b'), Regexp::IGNORECASE) match_result = text.scan(regex).map{|i| i.downcase} match_result = match_result.select{|i| i.size >= [:minimum_length]} if [:minimum_length] match_result.each do |w| result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift || [:negative_dictionary].map{|i| i.downcase}.include?(w) end result = result.sort_by{|i| i[[:order]]} result.reverse! if [:order_direction] == :desc result end |