Class: Rhetor::LexicalAnalyser
- Inherits:
-
Object
- Object
- Rhetor::LexicalAnalyser
- Defined in:
- lib/rhetor/lexical_analyser.rb
Overview
LexicalAnalyser is a class that performs lexical analysis of strings using a set of predefined rules.
Instance Attribute Summary collapse
-
#position ⇒ Integer
readonly
The current position of analyser.
-
#string ⇒ String
readonly
The string being analyzed.
Instance Method Summary collapse
-
#analyse(string) {|token| ... } ⇒ Array<Token>
Analyzes the given string.
-
#begin_analysis(string) ⇒ void
Initiates the analysis of the string.
-
#ignore(pattern) ⇒ void
Makes the analyser to ignore some pattern.
-
#initialize(&block) ⇒ void
constructor
Creates a new lexical analyser and evaluates the passed block within it.
-
#next_token ⇒ Token
The next token found in the string.
-
#rule(pattern, name, &evaluator) ⇒ void
Makes the analyser to recognize some pattern.
Constructor Details
#initialize(&block) ⇒ void
Creates a new lexical analyser and evaluates the passed block within it
30 31 32 33 34 35 36 37 38 39 |
# File 'lib/rhetor/lexical_analyser.rb', line 30 def initialize(&block) @string_patterns = {} @regexp_patterns = {} @ignored = [] @used_names = [] @evaluator = {} @string = nil @position = nil (block.arity == 1) ? block[self] : instance_eval(&block) if block_given? end |
Instance Attribute Details
#position ⇒ Integer (readonly)
Returns the current position of analyser.
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/rhetor/lexical_analyser.rb', line 15 class LexicalAnalyser attr_reader :string, :position # Creates a new lexical analyser and evaluates the passed block within it # @param block [Block] the block to be executed # @example Creating a simple HQ9+ parser # lexer = Rhetor::LexicalAnalyser.new { # rule 'H', :hello_world # rule 'Q', :quine # rule '9', :ninety_nine_bottles # rule '+', :increment # ignore /\s+/ # } # @return [void] # def initialize(&block) @string_patterns = {} @regexp_patterns = {} @ignored = [] @used_names = [] @evaluator = {} @string = nil @position = nil (block.arity == 1) ? block[self] : instance_eval(&block) if block_given? end # Makes the analyser to recognize some pattern # @param pattern [String, Regexp] the pattern # @param name [Symbol] the name of the rule # @param evaluator [Proc,nil] a proc. This proc will be called # if the pattern is encountered. It receives a matched substring # and calculates the value of the corresponding token. If this # argument is omitted, the value of the token will coincide # with the matched substring. # @raise [InvalidPattern] if the pattern is not valid # @raise [InvalidRuleName] unless the name of the rule is a symbol # @raise [RuleAlreadyExists] if the rule with the same name already exists # @return [void] # def rule(pattern, name, &evaluator) check_rule(pattern, name) @used_names.push name @evaluator[name] = evaluator array_name = "@#{pattern.class.name.downcase}_patterns".intern instance_variable_get(array_name)[name] = pattern end # Makes the analyser to ignore some pattern # @param pattern [String, Regexp] the pattern to be ignored # @return [void] # def ignore(pattern) fail InvalidPattern unless [String, Regexp].include? pattern.class @ignored.push pattern unless @ignored.include? pattern end # Initiates the analysis of the string # @param string [String] the string to be analyzed # @return [void] # def begin_analysis(string) fail InvalidString unless string.is_a? String @string = string @position = 0 @size = string.size end # @return [Token] the next token found in the string # @raise [NoStringLoaded] if no string is being analyzed # @raise [UnmatchedString] if the analyser is unable to get the next token # def next_token fail NoStringLoaded unless @string @position = skip_ignored(@string, @position) return EOF_TOKEN if @position >= @size name, length = string_pattern(@string, @position) name, length = regexp_pattern(@string, @position) if length == 0 fail UnmatchedString, "at position #{@position}" if length == 0 token = make_token(name, @position, length) @position += length token end # Analyzes the given string # @param string [String] the string to be analyzed # @yieldparam token [Token] every encountered token # @return [Array<Token>] the array of encountered tokens # def analyse(string, &block) begin_analysis(string) tokens = [] loop do last_token = next_token (last_token == EOF_TOKEN) ? break : tokens << last_token block.call(last_token) if block_given? end tokens end private def make_token(name, position, size) substring = @string[position, size] value = @evaluator[name] ? @evaluator[name].call(substring) : substring Rhetor::Token.new(value, name, position, size) end def check_rule(pattern, name) fail InvalidPattern unless [String, Regexp].include? pattern.class fail InvalidRuleName unless name.is_a? Symbol fail RuleAlreadyExists if @used_names.include? name end def string_pattern(string, position) results = @string_patterns.map do |name, pattern| [name, matched_size(pattern, string, position)] end results.max_by(&:last) || [nil, 0] end def regexp_pattern(string, position) results = @regexp_patterns.map do |name, pattern| [name, matched_size(pattern, string, position)] end # results.max_by(&:last) || [nil, 0] results.sort_by(&:last).find { |_name, size| size > 0 } || [nil, 0] end def skip_ignored(string, position) skipped = @ignored.map { |p| matched_size(p, string, position) }.max skipped ? position + skipped : position end def matched_size(pattern, string, position) if pattern.is_a? String (string[position, pattern.size] == pattern) ? pattern.size : 0 elsif pattern.is_a? Regexp md = string.match(pattern, position) return 0 unless md md.begin(0) == position ? md[0].size : 0 end end end |
#string ⇒ String (readonly)
Returns the string being analyzed.
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/rhetor/lexical_analyser.rb', line 15 class LexicalAnalyser attr_reader :string, :position # Creates a new lexical analyser and evaluates the passed block within it # @param block [Block] the block to be executed # @example Creating a simple HQ9+ parser # lexer = Rhetor::LexicalAnalyser.new { # rule 'H', :hello_world # rule 'Q', :quine # rule '9', :ninety_nine_bottles # rule '+', :increment # ignore /\s+/ # } # @return [void] # def initialize(&block) @string_patterns = {} @regexp_patterns = {} @ignored = [] @used_names = [] @evaluator = {} @string = nil @position = nil (block.arity == 1) ? block[self] : instance_eval(&block) if block_given? end # Makes the analyser to recognize some pattern # @param pattern [String, Regexp] the pattern # @param name [Symbol] the name of the rule # @param evaluator [Proc,nil] a proc. This proc will be called # if the pattern is encountered. It receives a matched substring # and calculates the value of the corresponding token. If this # argument is omitted, the value of the token will coincide # with the matched substring. # @raise [InvalidPattern] if the pattern is not valid # @raise [InvalidRuleName] unless the name of the rule is a symbol # @raise [RuleAlreadyExists] if the rule with the same name already exists # @return [void] # def rule(pattern, name, &evaluator) check_rule(pattern, name) @used_names.push name @evaluator[name] = evaluator array_name = "@#{pattern.class.name.downcase}_patterns".intern instance_variable_get(array_name)[name] = pattern end # Makes the analyser to ignore some pattern # @param pattern [String, Regexp] the pattern to be ignored # @return [void] # def ignore(pattern) fail InvalidPattern unless [String, Regexp].include? pattern.class @ignored.push pattern unless @ignored.include? pattern end # Initiates the analysis of the string # @param string [String] the string to be analyzed # @return [void] # def begin_analysis(string) fail InvalidString unless string.is_a? String @string = string @position = 0 @size = string.size end # @return [Token] the next token found in the string # @raise [NoStringLoaded] if no string is being analyzed # @raise [UnmatchedString] if the analyser is unable to get the next token # def next_token fail NoStringLoaded unless @string @position = skip_ignored(@string, @position) return EOF_TOKEN if @position >= @size name, length = string_pattern(@string, @position) name, length = regexp_pattern(@string, @position) if length == 0 fail UnmatchedString, "at position #{@position}" if length == 0 token = make_token(name, @position, length) @position += length token end # Analyzes the given string # @param string [String] the string to be analyzed # @yieldparam token [Token] every encountered token # @return [Array<Token>] the array of encountered tokens # def analyse(string, &block) begin_analysis(string) tokens = [] loop do last_token = next_token (last_token == EOF_TOKEN) ? break : tokens << last_token block.call(last_token) if block_given? end tokens end private def make_token(name, position, size) substring = @string[position, size] value = @evaluator[name] ? @evaluator[name].call(substring) : substring Rhetor::Token.new(value, name, position, size) end def check_rule(pattern, name) fail InvalidPattern unless [String, Regexp].include? pattern.class fail InvalidRuleName unless name.is_a? Symbol fail RuleAlreadyExists if @used_names.include? name end def string_pattern(string, position) results = @string_patterns.map do |name, pattern| [name, matched_size(pattern, string, position)] end results.max_by(&:last) || [nil, 0] end def regexp_pattern(string, position) results = @regexp_patterns.map do |name, pattern| [name, matched_size(pattern, string, position)] end # results.max_by(&:last) || [nil, 0] results.sort_by(&:last).find { |_name, size| size > 0 } || [nil, 0] end def skip_ignored(string, position) skipped = @ignored.map { |p| matched_size(p, string, position) }.max skipped ? position + skipped : position end def matched_size(pattern, string, position) if pattern.is_a? String (string[position, pattern.size] == pattern) ? pattern.size : 0 elsif pattern.is_a? Regexp md = string.match(pattern, position) return 0 unless md md.begin(0) == position ? md[0].size : 0 end end end |
Instance Method Details
#analyse(string) {|token| ... } ⇒ Array<Token>
Analyzes the given string
103 104 105 106 107 108 109 110 111 112 |
# File 'lib/rhetor/lexical_analyser.rb', line 103 def analyse(string, &block) begin_analysis(string) tokens = [] loop do last_token = next_token (last_token == EOF_TOKEN) ? break : tokens << last_token block.call(last_token) if block_given? end tokens end |
#begin_analysis(string) ⇒ void
This method returns an undefined value.
Initiates the analysis of the string
75 76 77 78 79 80 |
# File 'lib/rhetor/lexical_analyser.rb', line 75 def begin_analysis(string) fail InvalidString unless string.is_a? String @string = string @position = 0 @size = string.size end |
#ignore(pattern) ⇒ void
This method returns an undefined value.
Makes the analyser to ignore some pattern
66 67 68 69 |
# File 'lib/rhetor/lexical_analyser.rb', line 66 def ignore(pattern) fail InvalidPattern unless [String, Regexp].include? pattern.class @ignored.push pattern unless @ignored.include? pattern end |
#next_token ⇒ Token
Returns the next token found in the string.
86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/rhetor/lexical_analyser.rb', line 86 def next_token fail NoStringLoaded unless @string @position = skip_ignored(@string, @position) return EOF_TOKEN if @position >= @size name, length = string_pattern(@string, @position) name, length = regexp_pattern(@string, @position) if length == 0 fail UnmatchedString, "at position #{@position}" if length == 0 token = make_token(name, @position, length) @position += length token end |
#rule(pattern, name, &evaluator) ⇒ void
This method returns an undefined value.
Makes the analyser to recognize some pattern
54 55 56 57 58 59 60 |
# File 'lib/rhetor/lexical_analyser.rb', line 54 def rule(pattern, name, &evaluator) check_rule(pattern, name) @used_names.push name @evaluator[name] = evaluator array_name = "@#{pattern.class.name.downcase}_patterns".intern instance_variable_get(array_name)[name] = pattern end |