Class: ChefZero::Solr::SolrParser
- Inherits:
-
Object
- Object
- ChefZero::Solr::SolrParser
- Defined in:
- lib/chef_zero/solr/solr_parser.rb
Constant Summary collapse
- DEFAULT_FIELD =
"text".freeze
Instance Method Summary collapse
- #binary_operator?(token) ⇒ Boolean
- #binary_operator_precedence(token) ⇒ Object
- #eof? ⇒ Boolean
-
#initialize(query_string) ⇒ SolrParser
constructor
A new instance of SolrParser.
- #next_token ⇒ Object
- #parse ⇒ Object
- #parse_error(token, str) ⇒ Object
- #parse_token ⇒ Object
- #peek_operator_token ⇒ Object
- #peek_term_token ⇒ Object
-
#peek_token ⇒ Object
Tokenization.
-
#read_expression ⇒ Object
Parse tree creation.
- #read_single_expression ⇒ Object
- #skip_whitespace ⇒ Object
- #unary_operator?(token) ⇒ Boolean
Constructor Details
#initialize(query_string) ⇒ SolrParser
Returns a new instance of SolrParser.
13 14 15 16 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 13 def initialize(query_string) @query_string = query_string @index = 0 end |
Instance Method Details
#binary_operator?(token) ⇒ Boolean
188 189 190 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 188 def binary_operator?(token) [ "AND", "OR", "^", ":"].include?(token) end |
#binary_operator_precedence(token) ⇒ Object
192 193 194 195 196 197 198 199 200 201 202 203 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 192 def binary_operator_precedence(token) case token when "^" 4 when ":" 3 when "AND" 2 when "OR" 1 end end |
#eof? ⇒ Boolean
87 88 89 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 87 def eof? !@next_token && @index >= @query_string.length end |
#next_token ⇒ Object
29 30 31 32 33 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 29 def next_token result = peek_token @next_token = nil result end |
#parse ⇒ Object
18 19 20 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 18 def parse read_expression end |
#parse_error(token, str) ⇒ Object
121 122 123 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 121 def parse_error(token, str) raise ChefZero::Solr::ParseError, "Error on token '#{token}' at #{@index} of '#{@query_string}': #{str}" end |
#parse_token ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 35 def parse_token # Skip whitespace skip_whitespace return nil if eof? # Operators operator = peek_operator_token if operator @index += operator.length operator else # Everything that isn't whitespace or an operator, is part of a term # (characters plus backslashed escaped characters) start_index = @index loop do if @query_string[@index] == '\\' @index += 1 end @index += 1 unless eof? break if eof? || !peek_term_token end @query_string[start_index..@index - 1] end end |
#peek_operator_token ⇒ Object
74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 74 def peek_operator_token if ['"', "+", "-", "!", "(", ")", "{", "}", "[", "]", "^", ":"].include?(@query_string[@index]) return @query_string[@index] else result = @query_string[@index..@index + 1] if ["&&", "||"].include?(result) return result end end nil end |
#peek_term_token ⇒ Object
67 68 69 70 71 72 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 67 def peek_term_token return nil if @query_string[@index] =~ /\s/ op = peek_operator_token !op || op == "-" end |
#peek_token ⇒ Object
Tokenization
25 26 27 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 25 def peek_token @next_token ||= parse_token end |
#read_expression ⇒ Object
Parse tree creation
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 92 def read_expression result = read_single_expression # Expression is over when we hit a close paren or eof # (peek_token has the side effect of skipping whitespace for us, so we # really know if we're at eof or not) until peek_token == ")" || eof? operator = peek_token if binary_operator?(operator) next_token else # If 2 terms are next to each other, the default operator is OR operator = "OR" end next_expression = read_single_expression # Build the operator, taking precedence into account if result.is_a?(Query::BinaryOperator) && binary_operator_precedence(operator) > binary_operator_precedence(result.operator) # a+b*c -> a+(b*c) new_right = Query::BinaryOperator.new(result.right, operator, next_expression) result = Query::BinaryOperator.new(result.left, result.operator, new_right) else # a*b+c -> (a*b)+c result = Query::BinaryOperator.new(result, operator, next_expression) end end result end |
#read_single_expression ⇒ Object
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 125 def read_single_expression token = next_token # If EOF, we have a problem Houston if !token parse_error(nil, "Expected expression!") # If it's an unary operand, build that elsif unary_operator?(token) operand = read_single_expression # TODO We rely on all unary operators having higher precedence than all # binary operators. Check if this is the case. Query::UnaryOperator.new(token, operand) # If it's the start of a phrase, read the terms in the phrase elsif token == '"' # Read terms until close " phrase_terms = [] until (term = next_token) == '"' phrase_terms << Query::Term.new(term) end Query::Phrase.new(phrase_terms) # If it's the start of a range query, build that elsif token == "{" || token == "[" left = next_token parse_error(left, "Expected left term in range query") unless left to = next_token parse_error(left, "Expected TO in range query") if to != "TO" right = next_token parse_error(right, "Expected left term in range query") unless right end_range = next_token parse_error(right, "Expected end range '#{end_range}") unless ["}", "]"].include?(end_range) Query::RangeQuery.new(left, right, token == "[", end_range == "]") elsif token == "(" subquery = read_expression close_paren = next_token parse_error(close_paren, "Expected ')'") if close_paren != ")" Query::Subquery.new(subquery) # If it's the end of a closure, raise an exception elsif ["}", "]", ")"].include?(token) parse_error(token, "Unexpected end paren") # If it's a binary operator, raise an exception elsif binary_operator?(token) parse_error(token, "Unexpected binary operator") # Otherwise it's a term. else term = Query::Term.new(token) if peek_token == ":" Query::BinaryOperator.new(term, next_token, read_single_expression) else term end end end |
#skip_whitespace ⇒ Object
60 61 62 63 64 65 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 60 def skip_whitespace if @query_string[@index] =~ /\s/ whitespace = /\s+/.match(@query_string, @index) || peek @index += whitespace[0].length end end |
#unary_operator?(token) ⇒ Boolean
184 185 186 |
# File 'lib/chef_zero/solr/solr_parser.rb', line 184 def unary_operator?(token) [ "NOT", "+", "-", "!" ].include?(token) end |