Module: SrcML

Extended by:
Logging
Defined in:
lib/srcML/srcml.rb,
lib/exceptions/parse_error.rb,
lib/exceptions/unsupported_language.rb

Overview

Wrapper for the srcML commandline interface, with functions specifically directed at extracting method names and calculating diffs between files.

Defined Under Namespace

Classes: ParseError, UnsupportedLanguageError

Constant Summary collapse

LANGUAGES =

Hash of supported languages.

The file extension must map to a known format for srcML

{'.java' => 'Java',
'.C'    => 'C++',
'.cc'   => 'C++',
'.cpp'  => 'C++',
'.CPP'  => 'C++',
'.c++'  => 'C++',
'.cp'   => 'C++',
'.cs'   => 'C#',
'.c'    => 'C'}
@@ignore_comments =

Whether to remove comments from the source

false
@@ignore_whitespace =

Whether to remove whitespace from the source

false
@@basename_qualify =

Whether to qualify files with their full path or just their basename i.e., /lib/file.a or just file.a

false
@@residuals =

Whether to consider changes that happen outside of methods

false

Class Method Summary collapse

Methods included from Logging

configure_logger_for, logger, logger_for, set_level, set_location

Class Method Details

.ast(path, revision: FALSE) ⇒ Nokogiri::XML::Document

Calculates the AST of the given file

Parameters:

  • path (String)

    the path to the file

  • rev (String)

    if specified, retrieves the file from the given revision

Returns:

  • (Nokogiri::XML::Document)

    an AST representation



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/srcML/srcml.rb', line 96

def self.ast(path,revision: FALSE)
  # get the file content
  ast = ''
  if revision
    # explicitly call bash to get support for process substitution
    if language = LANGUAGES[File.extname(path)]
      ast,e,s = Open3.capture3("bash -c '#{SRCML} --language #{language} <(git show #{revision}:#{path})'")
      if !s.success?
        raise SrcML::ParseError, "Failed to parse #{revision}:#{path} using the #{language} parser"
      end
    else
      raise SrcML::UnsupportedLanguageError, "Language in the file '#{path}' not supported (guessed language from file type)"
    end
  else
    ast,e,s = Open3.capture3("#{SRCML} #{path}")
    if !s.success?
      raise SrcML::ParseError, "Failed to parse #{path}"
    end
  end
  # turn into structured xml
  xml = Nokogiri::XML(ast)
  if ignore_comments?
    # remove all comments
    xml.search('comment').each do |c|
      # trailing newline + any number of spaces are removed from the previous node
      # this gives a more intuitive behaviour
      # i.e., the newline + spaces before the comment is considered "part of" the comment
      if previous_node = c.previous_sibling
        previous_node.content = previous_node.content.gsub(/\n(\s)*/,"")
      end
      # now remove the comment
      c.remove
    end
  end
  if ignore_whitespace?
    # remove all new lines
    xml.search("text()").each do |node|
      if node.content =~ /\S/
        node.content = node.content.gsub(/[[:space:]]([[:space:]])*/,"")
      else
        node.remove
      end
    end
  end
  return xml
end

.basename_qualify=(bool) ⇒ Object



66
67
68
69
# File 'lib/srcML/srcml.rb', line 66

def self.basename_qualify= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@basename_qualify = bool
end

.basename_qualify?Boolean

Returns:

  • (Boolean)


71
72
73
# File 'lib/srcML/srcml.rb', line 71

def self.basename_qualify?
  @@basename_qualify
end

.changed_methods(old, new) ⇒ Array<String>

Calculate the changed methods of the file specified by revision and path

Parameters:

  • old (String)

    the path to the old file

  • new (String)

    the path to the new file

Returns:

  • (Array<String>)

    the changed methods



246
247
248
249
250
# File 'lib/srcML/srcml.rb', line 246

def self.changed_methods(old,new)
  methods_old = methods(old)
  methods_new = methods(new)
  return different_entries(methods_old,methods_new)
end

.changed_methods_git(path, revision) ⇒ Array<String>

Like #changed_methods but retrieves the file from a git revision Calculate the changed methods of the file specified by revision and path

Parameters:

  • path (String)

    the path to the file

  • revision (String)

    the revision to retrieve the file from

Returns:

  • (Array<String>)

    the changed methods



259
260
261
262
263
# File 'lib/srcML/srcml.rb', line 259

def self.changed_methods_git(path,revision)
  methods_new = methods(path, revision: revision)
  methods_old = methods(path, revision: revision+'~1')
  return different_entries(methods_old,methods_new)
end

.different_entries(old, new) ⇒ Array<String>

Given two Hashes, returns all the keys that either have different values in the two hashes or are not in both hashes.

@param: [Hash] old @param: [Hash] new

Returns:

  • (Array<String>)


217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/srcML/srcml.rb', line 217

def self.different_entries(old,new)
  different = []
  new.each do |k,v|
    # new keys
    if !old.key?(k)
      #    puts "KEY NOT IN OLD: #{k}"
      different << k
      # different values for same key    
    elsif v != old[k]
      #    puts "DIFFERENT VALUES SAME KEY\nOLD WAS:\n--\n#{old[k].split(//)}\n--\nNEW WAS:\n--\n#{v.split(//)}\n--"
      different << k
    end
  end
  # keys that are only in old
  deleted_keys = old.keys - new.keys
  if !deleted_keys.empty?
    # puts "KEY NOT IN NEW: #{deleted_keys}"
    different.concat(deleted_keys) 
  end
  return different
end

.ignore_comments=(bool) ⇒ Object



48
49
50
51
# File 'lib/srcML/srcml.rb', line 48

def self.ignore_comments= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@ignore_comments = bool
end

.ignore_comments?Boolean

Returns:

  • (Boolean)


53
54
55
# File 'lib/srcML/srcml.rb', line 53

def self.ignore_comments?
  @@ignore_comments
end

.ignore_whitespace=(bool) ⇒ Object



57
58
59
60
# File 'lib/srcML/srcml.rb', line 57

def self.ignore_whitespace= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@ignore_whitespace = bool
end

.ignore_whitespace?Boolean

Returns:

  • (Boolean)


62
63
64
# File 'lib/srcML/srcml.rb', line 62

def self.ignore_whitespace?
  @@ignore_whitespace
end

.methods(path, revision: FALSE) ⇒ Hash[method_name => method_hash]

Returns the methods of the given file

If the method has any parameters, the parameter types are also returned with the method name

Parameters:

  • path (String)

    the path to the file

  • rev (String)

    if specified, retrieves the file from the given revision

Returns:

  • (Hash[method_name => method_hash])

    a hash storing the methods as keys and the hashed method as values



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/srcML/srcml.rb', line 151

def self.methods(path,revision: FALSE)
  ast = ast(path,revision: revision)
  # hash each method and store in hash map with function name as key
  methods = Hash.new
  qualified_file = path
  if basename_qualify?
    qualified_file = File.basename(path)
  end
  # split file based on class declarations
  partitions = ast.search("class")
  if partitions.empty?
    # no classes, just use the full ast
    partitions = [ast]
  end
  partitions.each do |partition|
    # if partitioned into classes, attempt to extract class name
    class_name = ''
    if !partition.document? & name = partition.at_css("/name")
      class_name = name.text
    else
      logger.debug "(#{qualified_file}) Found partitioned file but could not find classname for this partition at location \\name'. Context:\n###\n#{partition}\n###"
    end
    partition.search("function").each do |function|
      if name = function.at_css("/name")
        # attempt to extract parameters
        parameters = []
        if parameter_list = function.at_css("/parameter_list")
          parameter_list.search("parameter").each do |p|
            if parameter = (p.at_css("decl type name name") or p.at_css("decl type name") or p.at_css("decl type") or p.at_css("type") or p.at_css("name"))
              parameters << parameter.text
            else
              logger.debug "(#{qualified_file}) Function: #{name}. Nested structures: (decl type name name) or (decl type name) or (decl type) not found in parameter xml, ignoring this parameter. Context:\n###\n#{p}\n###"
            end
          end
        else
          logger.debug "(#{qualified_file}) Parameter list not found for Function: #{name}. Searched for structure '/parameter_list'. Context:\n###\n#{function}\n###"
        end
        method_name = parameters.empty? ? name.text : name.text+"("+parameters.join(',')+")"
        fully_qualified_name = class_name.empty? ? [qualified_file,method_name].join(':') : [qualified_file,class_name,method_name].join(':')
        if block = function.at_css("block")
          methods[fully_qualified_name] = block.content.hash
        else
          logger.debug "(#{qualified_file}) No <block> (i.e. the function content) in the function xml. Function: #{name}."
        end
      else
        logger.debug "(#{qualified_file}) Could not identify function name at location '\\name'. Context:\n###\n#{p}\n###"
      end
    end
  end
  if residuals?
    # add residuals entry
    # i.e., whats left of the code when all methods are removed
    ast.search("function").remove
    methods[qualified_file+':'+'@residuals'] = ast.content.hash
  end

  return methods
end

.residuals=(bool) ⇒ Object



75
76
77
78
# File 'lib/srcML/srcml.rb', line 75

def self.residuals= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@residuals = bool
end

.residuals?Boolean

Returns:

  • (Boolean)


80
81
82
# File 'lib/srcML/srcml.rb', line 80

def self.residuals?
  @@residuals
end