Class: TSQL_ShParser

Inherits:
Object
  • Object
show all
Defined in:
lib/tsql_shparser.rb

Overview

Shallow Parser for t-SQL: Creates an array of arrays of Token objects for each t-SQL Statement parsed as input and generates an array of TSQLStmt objects.

Constant Summary collapse

VERSION =
"0.0.1"
@@start =
["SELECT","INSERT","UPDATE","DELETE","DROP","ALTER","CREATE","TRUNCATE"]
@@first =
["ALTER", "BACKUP", "BEGIN", "BREAK", "CHECKPOINT", "CONTINUE", "DENY", "GRANT", "REVOKE", 
"BULK", "CLOSE", "COMMIT", "CREATE","DBCC", "DEALLOCATE",
"DECLARE", "DELETE","DROP", "DUMP", "ERRLVL", "EXEC", "EXECUTE", "EXIT", "FETCH",
"GO", "GOTO", "IF", "INSERT","KILL", "LOAD", "OPEN", "OPENDATASOURCE", 
"OPENQUERY", "OPENROWSET","PRINT","RAISERROR", "READTEXT", "RECONFIGURE", 
"RESTORE","RETURN", "ROLLBACK", "SAVE", "SELECT", "SET", "SETUSER", "SHUTDOWN",
"STATISTICS","TRUNCATE", "UPDATE", "UPDATETEXT", "USE","WAITFOR", "WHILE", "WRITETEXT"]

Instance Method Summary collapse

Constructor Details

#initialize(file = nil) ⇒ TSQL_ShParser

Returns a new instance of TSQL_ShParser.



29
30
31
32
33
# File 'lib/tsql_shparser.rb', line 29

def initialize(file=nil)
 @input_file = file
 @tok = Tokenizer.new(file)
 @tok.tokenize_file  if file
end

Instance Method Details

#is_sub_select?Boolean

Return nil if it is not a sub-select Return relative index (w.r.t SELECT) of the token

prior to the leftmost LEFT_PARAN otherwise

Returns:

  • (Boolean)


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/tsql_shparser.rb', line 38

def is_sub_select?

  ss = nil
  
  # Make sure that we are indeed looking at the sub-SELECT
  # The SELECT token is already consumed, so we have to look back to verify
  curr = @tok.look_back(1)    
  return ss unless (curr.token_value == 'SELECT')
  
  # If there is no token before SELECT then this is not a sub-SELECT
  prev = @tok.look_back(2)
  return ss unless prev     

  n = 3
  
  # Take care of the arbitrary/redundant nesting of expressions in ( )    
  while(prev && (prev.token_value == LEFT_PARAN))
    prev = @tok.look_back(n)
    n += 1
  end 

  # Check the token before the left most LEFT_PARAN
  follow = ['SELECT','DISTINCT','PERCENT','JOIN','WHERE',
            'BY','IN','ANY','ALL','EXISTS','UNION','FROM',
            '<','>','=','>=','<=','<>',',']  

  prev_prev = @tok.look_back(n)

  ss = n if prev && follow.include?(prev.token_value) 

  # Consider the case: SELECT TOP 10 (select ...)   
  
  if (prev && (prev.token_value =~ /^\d+$/))    
    ss = n if (prev_prev && (prev_prev.token_value == 'TOP'))
  end

  # Consider the case UNION ALL
  
  if (prev && (prev.token_value == 'ALL'))
    ss = n if (prev_prev && (prev_prev.token_value == 'UNION'))
  end
  
  ## Finally, consider the case of SELECT being part of the INSERT statement
  #
  # skip the (optional) list of column names that may 
  # follow the name of the table in an INSERT, for e.g:
  #     INSERT INTO Table1 (COL1, COL2, COL3) 
  #     SELECT 'A1','B2','C3';      
  if (prev && prev.token_value == RIGHT_PARAN) 

    prev = @tok.look_back(n)
 
    # Rewind to the matching LEFT_PARAN
    # We are more strict here and verify that all tokens
    # are either Id or Comma: not considering the possibility of a Dot
    while (prev and ((prev.token_type == :Id) or (prev.token_type == :Comma)))
      prev = @tok.look_back(n)
      n += 1        
    end
    
    # This must be the LEFT_PARAN
    prev = @tok.look_back(n) if (prev and (prev.token_value == LEFT_PARAN))
  end
     
  while (prev and ((prev.token_type == :Id) or (prev.token_type == :Dot)))
    prev = @tok.look_back(n)
    n += 1      
  end
 
  # ??? prev = @tok.look_back(n) ???
  ss = n if (prev and prev.token_value == 'INSERT')

  prev_prev = @tok.look_back(n)
  ss = n+1 if (prev and (prev.token_value == 'INTO') and prev_prev and (prev_prev.token_value == 'INSERT'))      
 
  ss
  
end

#parse(str = nil) ⇒ Object

Parse the string passed as the argument (str). If no string is passed, it is assumed that the parser constructor was invoked with a file-name which was used to open an existing file and was parsed successfully. If no file-name or string was given, an empty array is returned. Otherwise an array of TSQLStmt objects is returned, one object per t-SQL statement that starts with the symbol in @@start.



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/tsql_shparser.rb', line 173

def parse(str=nil)

  stmts = []

  @tok.tokenize_string(str) if str
  t = @tok.get_next_token
  while t

    category = t.token_value
    
    if @@start.include?(category)
        @tok.unget_token
        s = parse_stmt(category)
        stmts << s unless s == []
    end
    
    t = @tok.get_next_token

  end
  
  stmts.map{|st| TSQLStmt.new(st)}
  
end

#parse_stmt(category = "SELECT") ⇒ Object

Keep consuming tokens till you detect the end of the t-SQL statement. It is not very accurate about determining the end especially if the statements are redundantly nested inside paranthesis or are immediately followed by user-defined functions or stored procs. It takes in as its argument a category which is one of the symbols in the @@start array. It returns an array (possibly empty) of Token objects.



123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/tsql_shparser.rb', line 123

def parse_stmt(category="SELECT")

  stmt = []
  
  curr = @tok.get_next_token
  return stmt if (curr.token_value != category)     
          
  stmt << curr
  
  found_set = false if category == 'UPDATE'
  case_end  = 0    
  
  prev = curr.token_value
  
  # Loop to find the end of the stmt
  loop do 

    curr = @tok.get_next_token
    break unless curr

    curr_tok  = curr.token_value
    case_end += 1 if curr_tok == 'CASE'
    case_end -= 1 if curr_tok == 'END'
    
    if ((not is_sub_select?) and (@@first.include?(curr_tok) or (curr.token_type == :Label) or (curr_tok == ';') or ((case_end < 0) and (curr_tok == 'END'))))
      # Handle TRIGGER Syntax
      unless (['OF','FOR','AFTER', ',','IF','AND','OR'].include?(prev) and ['INSERT','UPDATE','DELETE'].include?(curr_tok))        
        if ((category == 'UPDATE') and (not found_set) and (curr_tok == 'SET'))
          found_set = true
        else
          @tok.unget_token 
          break
        end
      end
    end
    
    stmt << curr    
    prev = curr_tok
  end 
  
  stmt
  
end