Class: ParseTreePatternMatcher

Inherits:
Object
  • Object
show all
Defined in:
lib/antlr4/tree/ParseTreePatternMatcher.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(lexer, parser) ⇒ ParseTreePatternMatcher

Returns a new instance of ParseTreePatternMatcher.



27
28
29
30
31
32
33
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 27

def initialize(lexer, parser)
    self.lexer = lexer
    self.parser = parser
    self.start = "<"
    self.stop = ">"
    self.escape = "\\"  # e.g., \< and \> must escape BOTH!
end

Instance Attribute Details

#escapeObject

Constructs a ParseTreePatternMatcher or from a Lexer and Parser object. The lexer input stream is altered for tokenizing the tree patterns. The parser is used as a convenient mechanism to get the grammar name, plus token, rule names.



26
27
28
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 26

def escape
  @escape
end

#lexerObject

Constructs a ParseTreePatternMatcher or from a Lexer and Parser object. The lexer input stream is altered for tokenizing the tree patterns. The parser is used as a convenient mechanism to get the grammar name, plus token, rule names.



26
27
28
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 26

def lexer
  @lexer
end

#parserObject

Constructs a ParseTreePatternMatcher or from a Lexer and Parser object. The lexer input stream is altered for tokenizing the tree patterns. The parser is used as a convenient mechanism to get the grammar name, plus token, rule names.



26
27
28
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 26

def parser
  @parser
end

#startObject

Constructs a ParseTreePatternMatcher or from a Lexer and Parser object. The lexer input stream is altered for tokenizing the tree patterns. The parser is used as a convenient mechanism to get the grammar name, plus token, rule names.



26
27
28
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 26

def start
  @start
end

#stopObject

Constructs a ParseTreePatternMatcher or from a Lexer and Parser object. The lexer input stream is altered for tokenizing the tree patterns. The parser is used as a convenient mechanism to get the grammar name, plus token, rule names.



26
27
28
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 26

def stop
  @stop
end

Instance Method Details

#compileTreePattern(pattern, patternRuleIndex) ⇒ Object

For repeated use of a tree pattern, compile it to a ParseTreePattern using this method.



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 93

def compileTreePattern(pattern, patternRuleIndex)
    tokenList = self.tokenize(pattern)
    tokenSrc = ListTokenSource.new(tokenList)
    tokens = CommonTokenStream.new(tokenSrc)
#        from antlr4.ParserInterpreter import ParserInterpreter

    p = self.parser
    parserInterp = ParserInterpreter.new(p.grammarFileName, p.tokenNames, p.ruleNames, 
                                          p.getATNWithBypassAlts(),tokens)
    tree = nil
    begin
        parserInterp.setErrorHandler(BailErrorStrategy())
        tree = parserInterp.parse(patternRuleIndex)
    rescue ParseCancellationException => e
        raise e.cause
    rescue RecognitionException => e
        raise e
    rescue Exception => e
        raise CannotInvokeStartRule.new(e)
    end

    # Make sure tree pattern compilation checks for a complete parse
    if tokens.LA(1)!=Token::EOF then
        raise StartRuleDoesNotConsumeFullPattern.new()
    end

    return ParseTreePattern.new(self, pattern, patternRuleIndex, tree)
end

#getRuleTagToken(tree) ⇒ Object

Is t (expr <expr>) subtree?#



203
204
205
206
207
208
209
210
211
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 203

def getRuleTagToken(tree)
    if tree.kind_of? RuleNode then
        if tree.getChildCount()==1 and tree.getChild(0).kind_of?  TerminalNode then
            c = tree.getChild(0)
            return c.symbol if c.symbol.kind_of?  RuleTagToken
        end
    end
    return nil
end

#map(labels, label, tree) ⇒ Object



194
195
196
197
198
199
200
201
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 194

def map(labels, label, tree)
    v = labels.get(label, nil)
    if v.nil? 
        v = Array.new
    end
    v.push(tree)
    labels[label] = v
end

#matchesPattern(tree, pattern) ⇒ Object

Does pattern matched as rule patternRuleIndex match tree? Pass in a

compiled pattern instead of a string representation of a tree pattern.


62
63
64
65
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 62

def matchesPattern(tree, pattern)
    mismatchedNode = self.matchImpl(tree, pattern.patternTree, Hash.new)
    return mismatchedNode.nil? 
end

#matchesRuleIndex(tree, pattern, patternRuleIndex) ⇒ Object

Does pattern matched as rule patternRuleIndex match tree?#



54
55
56
57
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 54

def matchesRuleIndex(tree, pattern, patternRuleIndex)
    p = self.compileTreePattern(pattern, patternRuleIndex)
    return self.matches(tree, p)
end

#matchImpl(tree, patternTree, labels) ⇒ Object

Recursively walk tree against patternTree, filling match.ParseTreeMatch#labels labels.

a corresponding node in patternTree, or null if the match was successful. The specific node returned depends on the matching algorithm used by the implementation, and may be overridden.

Returns:

  • the first node encountered in tree which does not match

Raises:

  • (Exception)


130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 130

def matchImpl(tree, patternTree, labels)
    raise Exception.new("tree cannot be null") if tree.nil?
    raise Exception.new("patternTree cannot be null") if patternTree.nil?

    # x and <ID>, x and y, or x and x; or could be mismatched types
    if tree.kind_of? TerminalNode and patternTree.kind_of? TerminalNode then
        mismatchedNode = nil
        # both are tokens and they have same type
        if tree.symbol.type == patternTree.symbol.type then
            if patternTree.symbol.kind_of? TokenTagToken then # x and <ID>
                tokenTagToken = patternTree.symbol
                # track label->list-of-nodes for both token name and label (if any)
                self.map(labels, tokenTagToken.tokenName, tree)
                if not tokenTagToken.label.nil? 
                    self.map(labels, tokenTagToken.label, tree)
                end
            elsif tree.getText()==patternTree.getText() then
                # x and x
                nil
            else
                # x and y
                mismatchedNode = tree if mismatchedNode.nil?
            end
        else
            mismatchedNode = tree if mismatchedNode.nil? 
        end

        return mismatchedNode
    end

    if tree.kind_of? ParserRuleContext and patternTree.kind_of?  ParserRuleContext then
        mismatchedNode = nil
        # (expr ...) and <expr>
        ruleTagToken = self.getRuleTagToken(patternTree)
        if not ruleTagToken.nil? then
            m = nil
            if tree.ruleContext.ruleIndex == patternTree.ruleContext.ruleIndex then
                # track label->list-of-nodes for both rule name and label (if any)
                self.map(labels, ruleTagToken.ruleName, tree)
                if not ruleTagToken.label.nil? then
                    self.map(labels, ruleTagToken.label, tree)
                end
            else
                mismatchedNode = tree if mismatchedNode.nil?
            end
            return mismatchedNode
        end

        # (expr ...) and (expr ...)
        if tree.getChildCount()!=patternTree.getChildCount() then
            mismatchedNode = tree if mismatchedNode.nil?
            return mismatchedNode
        end

        n = tree.getChildCount()
        for i in 0..n-1 do
            childMatch = self.matchImpl(tree.getChild(i), patternTree.getChild(i), labels)
            return childMatch if not childMatch.nil? 
        end
        return mismatchedNode
    end
    # if nodes aren't both tokens or both rule nodes, can't match
    return tree
end

#matchPattern(tree, pattern) ⇒ Object

Compare pattern matched against tree and return a ParseTreeMatch object that contains the matched elements, or the node at which the match failed. Pass in a compiled pattern instead of a string representation of a tree pattern.



83
84
85
86
87
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 83

def matchPattern(tree, pattern)
    labels = Hash.new
    mismatchedNode = self.matchImpl(tree, pattern.patternTree, labels)
    return ParseTreeMatch.new(tree, pattern, labels, mismatchedNode)
end

#matchRuleIndex(tree, pattern, patternRuleIndex) ⇒ Object

Compare pattern matched as rule patternRuleIndex against tree and return a ParseTreeMatch object that contains the matched elements, or the node at which the match failed.



72
73
74
75
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 72

def matchRuleIndex(tree, pattern, patternRuleIndex)
    p = self.compileTreePattern(pattern, patternRuleIndex)
    return self.matchPattern(tree, p)
end

#setDelimiters(start, stop, escapeLeft) ⇒ Object

Set the delimiters used for marking rule and token tags within concrete syntax used by the tree pattern parser.

Parameters:

  • start

    The start delimiter.

  • stop

    The stop delimiter.

  • escapeLeft

    The escape sequence to use for escaping a start or stop delimiter.

Raises:

  • (Exception)


45
46
47
48
49
50
51
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 45

def setDelimiters(start, stop, escapeLeft)
    raise Exception.new("start cannot be null or empty") if start.nil? or start.empty? 
    raise Exception.new("stop cannot be null or empty") if stop.nil? or stop.empty?
    self.start = start
    self.stop = stop
    self.escape = escapeLeft
end

#split(pattern) ⇒ Object

Split <ID> = <e:expr> ; into 4 chunks for tokenizing by #tokenize.#



249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 249

def split(pattern)
    p = 0
    n = pattern.length
    chunks = list()
    # find all start and stop indexes first, then collect
    starts = Array.new
    stops = Array.new
    while p < n do
        if p == pattern.find(self.escape + self.start, p) then
            p = p + self.escape.length + self.start.length
        elsif p == pattern.find(self.escape + self.stop, p) then
            p = p + self.escape.length + self.stop.length
        elsif p == pattern.find(self.start, p) then
            starts.push(p)
            p = p + self.start.length
        elsif p == pattern.find(self.stop, p) then
            stops.push(p)
            p = p + self.stop.length
        else
            p = p + 1
        end
    end
    nt = starts.length

    if nt > stops.length
        raise Exception.new("unterminated tag in pattern: #{pattern}")
    end
    if nt < stops.length
        raise Exception.new("missing start tag in pattern: #{pattern}")
    end

    for i in 0..(nt-1) do
        if starts[i] >= stops[i] then
            raise Exception.new("tag delimiters out of order in pattern: " + pattern)
        end
    end

    # collect into chunks now
    chunks.push(TextChunk.new(pattern)) if nt==0

    if nt>0 and starts[0]>0 then # copy text up to first tag into chunks
        text = pattern[0..starts[0]-1]
        chunks.add(TextChunk.new(text))
    end

    for i in 0..(nt-1) do
        # copy inside of <tag>
        tag = pattern[(starts[i] + self.start.length)..stops[i]-1]
        ruleOrToken = tag
        label = nil
        colon = tag.find(':')
        if colon >= 0 then
            label = tag[0..colon-1]
            ruleOrToken = tag[colon+1..tag.length-1]
        end
        chunks.push(TagChunk.new(label, ruleOrToken))
        if i+1 < (starts.length) then
            # copy from end of <tag> to start of next
            text = pattern[(stops[i] + self.stop.length())..starts[i+1]-1]
            chunks.push(TextChunk.new(text))
        end
    end

    if nt > 0 then
        afterLastTag = stops[nt - 1] + self.stop.length
        if afterLastTag < n then # copy text from end of last tag to end
            text = pattern[afterLastTag .. n -1]
            chunks.push(TextChunk.new(text))
        end
    end

    # strip out the escape sequences from text chunks but not tags
    return chunks.map do |c| 
        if c.kind_of? TextChunk then
            unescaped = c.text.replace(self.escape, "")
            if unescaped.length < c.text.length then
               TextChunk.new(unescaped)
            else
               c
            end
        else 
            c
        end
    end
end

#tokenize(pattern) ⇒ Object



212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/antlr4/tree/ParseTreePatternMatcher.rb', line 212

def tokenize(pattern)
    # split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
    chunks = self.split(pattern)

    # create token stream from text and tags
    tokens = Array.new
    for chunk in chunks do
        if chunk.kind_of? TagChunk then
            # add special rule token or conjure up new token from name
            if chunk.tag[0].isupper() then
                ttype = self.parser.getTokenType(chunk.tag)
                if ttype==Token::INVALID_TYPE then
                    raise Exception.new("Unknown token #{chunk.tag} in pattern: #{pattern}")
                end
                tokens.push(TokenTagToken(chunk.tag, ttype, chunk.label))
            elsif chunk.tag[0].islower() then
                ruleIndex = self.parser.getRuleIndex(chunk.tag)
                if ruleIndex==-1 then
                    raise Exception.new("Unknown rule #{chunk.tag} in pattern: #{pattern}")
                end
                ruleImaginaryTokenType = self.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex]
                tokens.push(RuleTagToken(chunk.tag, ruleImaginaryTokenType, chunk.label))
            else
                raise Exception.new("Invalid tag #{chunk.tag} in pattern: #{pattern}")
            end
        else
            self.lexer.setInputStream(InputStream.new(chunk.text))
            t = self.lexer.nextToken()
            while t.type!=Token::EOF do 
                tokens.push(t)
                t = self.lexer.nextToken()
            end
        end
    end
    return tokens
end