Class: EmergeCLI::Reaper::AstParser

Inherits:
Object
  • Object
show all
Defined in:
lib/reaper/ast_parser.rb

Overview

Parses the AST of a given file using Tree Sitter and allows us to find usages or delete types. This does have a lot of limitations since it only looks at a single file at a time, but can get us most of the way there.

Constant Summary collapse

DECLARATION_NODE_TYPES =
{
  'swift' => %i[class_declaration protocol_declaration],
  'kotlin' => %i[class_declaration protocol_declaration interface_declaration object_declaration],
  'java' => %i[class_declaration protocol_declaration interface_declaration],
  'objc' => %i[class_declaration protocol_declaration class_implementation class_interface]
}.freeze
IDENTIFIER_NODE_TYPES =
{
  'swift' => %i[simple_identifier qualified_name identifier type_identifier],
  'kotlin' => %i[simple_identifier qualified_name identifier type_identifier],
  'java' => %i[simple_identifier qualified_name identifier type_identifier],
  'objc' => %i[simple_identifier qualified_name identifier type_identifier]
}.freeze
COMMENT_AND_IMPORT_NODE_TYPES =
{
  'swift' => %i[comment import_declaration],
  'kotlin' => %i[comment import_header],
  'java' => %i[comment import_declaration],
  'objc' => %i[comment import_declaration preproc_include]
}.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(language) ⇒ AstParser

Returns a new instance of AstParser.



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/reaper/ast_parser.rb', line 55

def initialize(language)
  @parser = TreeSitter::Parser.new
  @language = language
  @current_file_contents = nil

  platform = case RUBY_PLATFORM
             when /darwin/
               'darwin'
             when /linux/
               'linux'
             else
               raise "Unsupported platform: #{RUBY_PLATFORM}"
             end

  arch = case RUBY_PLATFORM
         when /x86_64|amd64/
           'x86_64'
         when /arm64|aarch64/
           'arm64'
         else
           raise "Unsupported architecture: #{RUBY_PLATFORM}"
         end

  parser_path = self.class.find_parser_path(language, platform, arch)
  @parser.language = TreeSitter::Language.load(language, parser_path)
end

Instance Attribute Details

#languageObject (readonly)

Returns the value of attribute language.



30
31
32
# File 'lib/reaper/ast_parser.rb', line 30

def language
  @language
end

#parserObject (readonly)

Returns the value of attribute parser.



30
31
32
# File 'lib/reaper/ast_parser.rb', line 30

def parser
  @parser
end

Class Method Details

.find_parser_path(language, platform, arch) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/reaper/ast_parser.rb', line 35

def find_parser_path(language, platform, arch)
  cache_key = "#{language}-#{platform}-#{arch}"
  return @parser_paths_cache[cache_key] if @parser_paths_cache.key?(cache_key)

  extension = platform == 'darwin' ? 'dylib' : 'so'
  parser_file = "libtree-sitter-#{language}-#{platform}-#{arch}.#{extension}"

  parser_paths = [
    File.join(File.dirname(__FILE__), '..', '..', 'parsers', parser_file), # Relative to this file
    File.join(Gem::Specification.find_by_name('emerge').gem_dir, 'parsers', parser_file) # Installed gem path
  ]

  parser_path = parser_paths.find { |path| File.exist?(path) }
  raise "No language grammar found for #{language}. Searched in: #{parser_paths.join(', ')}" unless parser_path

  @parser_paths_cache[cache_key] = parser_path
  parser_path
end

Instance Method Details

#delete_type(file_contents:, type_name:) ⇒ Object

Deletes a type from the given file contents. Returns the modified file contents if successful, otherwise nil. TODO(telkins): Look into the tree-sitter query API to see if it simplifies this.



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/reaper/ast_parser.rb', line 85

def delete_type(file_contents:, type_name:)
  @current_file_contents = file_contents
  tree = @parser.parse_string(nil, file_contents)
  cursor = TreeSitter::TreeCursor.new(tree.root_node)
  nodes_to_process = [cursor.current_node]
  lines_to_remove = []

  while (node = nodes_to_process.shift)
    Logger.debug "Processing node: #{node.type} #{node_text(node)}"
    if declaration_node_types.include?(node.type)
      type_identifier_node = find_type_identifier(node)
      if type_identifier_node && fully_qualified_type_name(type_identifier_node) == type_name
        remove_node(node, lines_to_remove)
      end
    end

    if extension?(node)
      user_type_nodes = node.select { |n| n.type == :user_type }
      if user_type_nodes.length >= 1 && fully_qualified_type_name(user_type_nodes[0]) == type_name
        remove_node(node, lines_to_remove)
      end
    end

    node.each_named { |child| nodes_to_process.push(child) }
  end

  lines = file_contents.split("\n")
  lines_to_remove.each do |range|
    Logger.debug "Removing lines #{range[:start]} to #{range[:end]}"
    (range[:start]..range[:end]).each { |i| lines[i] = nil }

    # Remove extra newline after class declaration, but only if it's blank
    if range[:end] + 1 < lines.length && !lines[range[:end] + 1].nil? && lines[range[:end] + 1].match?(/^\s*$/)
      lines[range[:end] + 1] = nil
    end
  end

  modified_source = lines.compact.join("\n")
  new_tree = @parser.parse_string(nil, modified_source)

  return nil if only_comments_and_imports?(TreeSitter::TreeCursor.new(new_tree.root_node))

  # Preserve original newline state
  had_final_newline = file_contents.end_with?("\n")
  modified_source = modified_source.rstrip
  had_final_newline ? "#{modified_source}\n" : modified_source
end

#delete_usage(file_contents:, type_name:) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/reaper/ast_parser.rb', line 167

def delete_usage(file_contents:, type_name:)
  @current_file_contents = file_contents
  tree = @parser.parse_string(nil, file_contents)
  cursor = TreeSitter::TreeCursor.new(tree.root_node)
  nodes_to_process = [cursor.current_node]
  nodes_to_remove = []

  Logger.debug "Starting to scan for usages of #{type_name}"

  while (node = nodes_to_process.shift)
    identifier_type = identifier_node_types.include?(node.type)
    if identifier_type && node_text(node) == type_name
      Logger.debug "Found usage of #{type_name} in node type: #{node.type}"
      removable_node = find_removable_parent(node)
      if removable_node
        Logger.debug "Will remove parent node of type: #{removable_node.type}"
        Logger.debug "Node text to remove: #{node_text(removable_node)}"
        nodes_to_remove << removable_node
      else
        Logger.debug 'No suitable parent node found for removal'
      end
    end

    node.each { |child| nodes_to_process.push(child) }
  end

  return file_contents if nodes_to_remove.empty?

  Logger.debug "✅ Found #{nodes_to_remove.length} nodes to remove"
  remove_nodes_from_content(file_contents, nodes_to_remove)
end

#find_usages(file_contents:, type_name:) ⇒ Object

Finds all usages of a given type in a file. TODO(telkins): Look into the tree-sitter query API to see if it simplifies this.



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/reaper/ast_parser.rb', line 135

def find_usages(file_contents:, type_name:)
  @current_file_contents = file_contents
  tree = @parser.parse_string(nil, file_contents)
  cursor = TreeSitter::TreeCursor.new(tree.root_node)
  usages = []
  nodes_to_process = [cursor.current_node]

  while (node = nodes_to_process.shift)
    identifier_type = identifier_node_types.include?(node.type)
    Logger.debug "Processing node: #{node.type} #{node_text(node)}"
    declaration_type = if node == tree.root_node
                         false
                       else
                         declaration_node_types.include?(node.parent&.type)
                       end
    if declaration_type && fully_qualified_type_name(node) == type_name
      usages << { line: node.start_point.row, usage_type: 'declaration' }
    elsif identifier_type && node_text(node) == type_name
      usages << { line: node.start_point.row, usage_type: 'identifier' }
    elsif node.type == :@implementation
      next_sibling = node.next_named_sibling
      if next_sibling.type == :identifier && node_text(next_sibling) == type_name
        usages << { line: next_sibling.start_point.row, usage_type: 'declaration' }
      end
    end

    node.each { |child| nodes_to_process.push(child) }
  end

  usages
end