Class: Appstats::Parser

Inherits:

Object

Object
Appstats::Parser

show all

Defined in:: lib/appstats/parser.rb

Instance Attribute Summary collapse

#constants ⇒ Object readonly

Returns the value of attribute constants.
#constants_no_spaces ⇒ Object readonly

Returns the value of attribute constants_no_spaces.
#raw_results ⇒ Object readonly

Returns the value of attribute raw_results.
#raw_rules ⇒ Object readonly

Returns the value of attribute raw_rules.
#raw_tokenize ⇒ Object readonly

Returns the value of attribute raw_tokenize.
#repeating ⇒ Object readonly

Returns the value of attribute repeating.
#results ⇒ Object readonly

Returns the value of attribute results.
#rules ⇒ Object readonly

Returns the value of attribute rules.
#tokenize ⇒ Object readonly

Returns the value of attribute tokenize.
#tokenize_no_spaces ⇒ Object readonly

Returns the value of attribute tokenize_no_spaces.
#tokenize_regex ⇒ Object readonly

Returns the value of attribute tokenize_regex.
#tokenize_regex_no_spaces ⇒ Object readonly

Returns the value of attribute tokenize_regex_no_spaces.

Class Method Summary collapse

Instance Method Summary collapse

#initialize(data = {}) ⇒ Parser constructor

A new instance of Parser.
#parse(input) ⇒ Object
#parse_word(current_text, stop_on, strict = false) ⇒ Object

Constructor Details

#initialize(data = {}) ⇒ `Parser`

Returns a new instance of Parser.

# File 'lib/appstats/parser.rb', line 9

def initialize(data = {})
  @raw_rules = data[:rules]
  @raw_tokenize = data[:tokenize]
  @repeating = data[:repeating] == true
  @results = {}
  @raw_results = []
  update_tokens
  update_rules
end

Instance Attribute Details

#constants ⇒ `Object` (readonly)

Returns the value of attribute constants.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def constants
  @constants
end

#constants_no_spaces ⇒ `Object` (readonly)

Returns the value of attribute constants_no_spaces.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def constants_no_spaces
  @constants_no_spaces
end

#raw_results ⇒ `Object` (readonly)

Returns the value of attribute raw_results.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def raw_results
  @raw_results
end

#raw_rules ⇒ `Object` (readonly)

Returns the value of attribute raw_rules.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def raw_rules
  @raw_rules
end

#raw_tokenize ⇒ `Object` (readonly)

Returns the value of attribute raw_tokenize.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def raw_tokenize
  @raw_tokenize
end

#repeating ⇒ `Object` (readonly)

Returns the value of attribute repeating.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def repeating
  @repeating
end

#results ⇒ `Object` (readonly)

Returns the value of attribute results.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def results
  @results
end

#rules ⇒ `Object` (readonly)

Returns the value of attribute rules.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def rules
  @rules
end

#tokenize ⇒ `Object` (readonly)

Returns the value of attribute tokenize.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def tokenize
  @tokenize
end

#tokenize_no_spaces ⇒ `Object` (readonly)

Returns the value of attribute tokenize_no_spaces.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def tokenize_no_spaces
  @tokenize_no_spaces
end

#tokenize_regex ⇒ `Object` (readonly)

Returns the value of attribute tokenize_regex.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def tokenize_regex
  @tokenize_regex
end

#tokenize_regex_no_spaces ⇒ `Object` (readonly)

Returns the value of attribute tokenize_regex_no_spaces.



5
6
7

# File 'lib/appstats/parser.rb', line 5

def tokenize_regex_no_spaces
  @tokenize_regex_no_spaces
end

Class Method Details

.alpha?(raw_input) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/appstats/parser.rb', line 76

def self.alpha?(raw_input)
  return false if raw_input.nil?
  !raw_input.match(/^[A-Za-z]+$/i).nil?
end

.merge_regex_filter(inputs = []) ⇒ `Object`

# File 'lib/appstats/parser.rb', line 100

def self.merge_regex_filter(inputs = [])
  inputs.collect! { |x| x unless x.blank? }.compact!
  return "" if inputs.empty?
  "(#{inputs.join('|')})"
end

.parse_constant(current_text, constant) ⇒ `Object`

# File 'lib/appstats/parser.rb', line 81

def self.parse_constant(current_text,constant)
  answer = [nil,nil]
  return answer if current_text.blank? || constant.nil?
  current_text.strip!
  
  remaining_text_index = -1
  if alpha?(constant)
    m = current_text.match(/^(#{constant})(\s|$)(.*)$/im)  
    remaining_text_index = 3
  else
    m = current_text.match(/^(#{constant})(.*)$/im)  
    remaining_text_index = 2
  end
  
  answer[0] = m[1] unless m.nil?
  answer[1] = m.nil? ? current_text : m[remaining_text_index]
  clean_parsed_word(answer)
end

Instance Method Details

#parse(input) ⇒ `Object`

# File 'lib/appstats/parser.rb', line 19

def parse(input)
  @results = {}
  @raw_results = []
  return false if input.nil?
  return false if @rules.size == 0

  @rule_index = 0
  @max_rule_index = @rules.size - 1
  @previous_text_so_far = input.strip
  @text_so_far = @previous_text_so_far
  @remaining_constants = @constants.dup
  @remaining_constants_no_spaces = @constants_no_spaces.dup
  
  while !@text_so_far.blank?
    process_constant_if_present
    break if @rule_index > @max_rule_index && !@repeating
    @rule_index = 0 if @rule_index > @max_rule_index

    rule = @rules[@rule_index]
    @rule_index += 1
    
    if rule.kind_of?(Hash)
      if rule[:stop] == :constant
        was_found = false
        @remaining_constants.each_with_index do |k,index|
          p = parse_word(@text_so_far,k,true)
          if p[0].nil?
            unset_rules_until(k)
          else
            (index-1).downto(0) do |i|
              @remaining_constants_no_spaces.delete_at(i)
              @remaining_constants.delete_at(i)
            end
            add_results(rule[:rule],p[0])
            @text_so_far = p[1]
            was_found = true
            break
          end
        end
        unless was_found
          add_results(rule[:rule],@text_so_far)
          @text_so_far = nil
        end
      else
        p = parse_word(@text_so_far,rule[:stop],false)
        add_results(rule[:rule],p[0]) unless p[0].nil?
        @text_so_far = p[1]
      end
    end
    break if @previous_text_so_far == @text_so_far
    @previous_text_so_far = @text_so_far
  end
  remove_tokens_at_start(@text_so_far)
  unset_rules_until(nil)
  true
end

#parse_word(current_text, stop_on, strict = false) ⇒ `Object`

# File 'lib/appstats/parser.rb', line 106

def parse_word(current_text,stop_on,strict = false)
  answer = [nil,nil]
  return answer if current_text.blank? || stop_on.nil?
  current_text.strip!

  current_text = remove_tokens_at_start(current_text)
  
  if stop_on == :end
    filter = Parser.merge_regex_filter([nil,@tokenize_regex])
    m = current_text.match(/^(.*?)(#{filter}.*)$/im)
    if m.nil? || m[1].blank?
      answer[0] = current_text
    else
      answer[0] = m[1]
      answer[1] = m[2]
    end
  elsif stop_on == :space
    filter = Parser.merge_regex_filter(['\s',@tokenize_regex,remaining_constants_regex])
    m = current_text.match(/^(.*?)(#{filter}.*)$/im)
    if m.nil?
      answer[0] = current_text
    else
      answer[0] = m[1]
      answer[1] = m[2]
    end
  else
    filter = Parser.merge_regex_filter([stop_on,@tokenize_regex,remaining_constants_regex])
    m = current_text.match(/^(.*?)(#{filter}.*)$/im)
    if strict
      answer[0] = m[1] unless m.nil?
      answer[1] = m.nil? ? current_text : m[2]
    else
      answer[0] = m.nil? ? current_text : m[1]
      answer[1] = m[2] unless m.nil?
    end
  end
  answer = Parser.clean_parsed_word(answer)
  answer
end

Class: Appstats::Parser

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data = {}) ⇒ Parser

Instance Attribute Details

#constants ⇒ Object (readonly)

#constants_no_spaces ⇒ Object (readonly)

#raw_results ⇒ Object (readonly)

#raw_rules ⇒ Object (readonly)

#raw_tokenize ⇒ Object (readonly)

#repeating ⇒ Object (readonly)

#results ⇒ Object (readonly)

#rules ⇒ Object (readonly)

#tokenize ⇒ Object (readonly)

#tokenize_no_spaces ⇒ Object (readonly)

#tokenize_regex ⇒ Object (readonly)

#tokenize_regex_no_spaces ⇒ Object (readonly)