Module: SrcML

Extended by:
Logging
Defined in:
lib/srcML/srcml.rb,
lib/exceptions/parse_error.rb,
lib/exceptions/unsupported_language.rb

Overview

Wrapper for the srcML commandline interface, with functions specifically directed at extracting method names and calculating diffs between files.

Defined Under Namespace

Classes: ParseError, UnsupportedLanguageError

Constant Summary collapse

LANGUAGES =

Hash of supported languages.

The file extension must map to a known format for srcML

{'.java' => 'Java',
'.C'    => 'C++',
'.cc'   => 'C++',
'.cpp'  => 'C++',
'.CPP'  => 'C++',
'.c++'  => 'C++',
'.cp'   => 'C++',
'.cs'   => 'C#',
'.c'    => 'C'}
SRCML =

name of srcml command

'srcml'
@@ignore_comments =

Whether to remove comments from the source

false
@@ignore_whitespace =

Whether to remove whitespace from the source

false
@@basename_qualify =

Whether to qualify files with their full path or just their basename i.e., /lib/file.a or just file.a

false
@@residuals =

Whether to consider changes that happen outside of methods

false

Class Method Summary collapse

Methods included from Logging

configure_logger_for, logger, logger_for, set_level, set_location

Class Method Details

.ast(path, revision: FALSE) ⇒ Nokogiri::XML::Document

Calculates the AST of the given file

Parameters:

  • path (String)

    the path to the file

  • rev (String)

    if specified, retrieves the file from the given revision

Returns:

  • (Nokogiri::XML::Document)

    an AST representation



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/srcML/srcml.rb', line 89

def self.ast(path,revision: FALSE)
  # get the file content
  ast = ''
  if revision
    # explicitly call bash to get support for process substitution
    if language = LANGUAGES[File.extname(path)]
      ast,e,s = Open3.capture3("bash -c '#{SRCML} --language #{language} <(git show #{revision}:#{path})'")
      if !s.success?
        raise SrcML::ParseError, "Failed to parse #{revision}:#{path} using the #{language} parser"
      end
    else
      raise SrcML::UnsupportedLanguageError, "Language in the file '#{path}' not supported (guessed language from file type)"
    end
  else
    ast,e,s = Open3.capture3("#{SRCML} #{path}")
    if !s.success?
      raise SrcML::ParseError, "Failed to parse #{path}, is srcml installed?"
    end
  end
  # turn into structured xml
  xml = Nokogiri::XML(ast)
  if ignore_comments?
    # remove all comments
    xml.search('comment').each do |c|
      # trailing newline + any number of spaces are removed from the previous node
      # this gives a more intuitive behaviour
      # i.e., the newline + spaces before the comment is considered "part of" the comment
      if previous_node = c.previous_sibling
        previous_node.content = previous_node.content.gsub(/\n(\s)*/,"")
      end
      # now remove the comment
      c.remove
    end
  end
  if ignore_whitespace?
    # remove all new lines
    xml.search("text()").each do |node|
      if node.content =~ /\S/
        node.content = node.content.gsub(/[[:space:]]([[:space:]])*/,"")
      else
        node.remove
      end
    end
  end
  return xml
end

.basename_qualify=(bool) ⇒ Object



59
60
61
62
# File 'lib/srcML/srcml.rb', line 59

def self.basename_qualify= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@basename_qualify = bool
end

.basename_qualify?Boolean

Returns:

  • (Boolean)


64
65
66
# File 'lib/srcML/srcml.rb', line 64

def self.basename_qualify?
  @@basename_qualify
end

.changed_methods(old, new) ⇒ Array<String>

Calculate the changed methods of the file specified by revision and path

Parameters:

  • old (String)

    the path to the old file

  • new (String)

    the path to the new file

Returns:

  • (Array<String>)

    the changed methods



239
240
241
242
243
# File 'lib/srcML/srcml.rb', line 239

def self.changed_methods(old,new)
  methods_old = methods(old)
  methods_new = methods(new)
  return different_entries(methods_old,methods_new)
end

.changed_methods_git(path, revision) ⇒ Array<String>

Like #changed_methods but retrieves the file from a git revision Calculate the changed methods of the file specified by revision and path

Parameters:

  • path (String)

    the path to the file

  • revision (String)

    the revision to retrieve the file from

Returns:

  • (Array<String>)

    the changed methods



252
253
254
255
256
# File 'lib/srcML/srcml.rb', line 252

def self.changed_methods_git(path,revision)
  methods_new = methods(path, revision: revision)
  methods_old = methods(path, revision: revision+'~1')
  return different_entries(methods_old,methods_new)
end

.different_entries(old, new) ⇒ Array<String>

Given two Hashes, returns all the keys that either have different values in the two hashes or are not in both hashes.

@param: [Hash] old @param: [Hash] new

Returns:

  • (Array<String>)


210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# File 'lib/srcML/srcml.rb', line 210

def self.different_entries(old,new)
  different = []
  new.each do |k,v|
    # new keys
    if !old.key?(k)
      #    puts "KEY NOT IN OLD: #{k}"
      different << k
      # different values for same key    
    elsif v != old[k]
      #    puts "DIFFERENT VALUES SAME KEY\nOLD WAS:\n--\n#{old[k].split(//)}\n--\nNEW WAS:\n--\n#{v.split(//)}\n--"
      different << k
    end
  end
  # keys that are only in old
  deleted_keys = old.keys - new.keys
  if !deleted_keys.empty?
    # puts "KEY NOT IN NEW: #{deleted_keys}"
    different.concat(deleted_keys) 
  end
  return different
end

.ignore_comments=(bool) ⇒ Object



41
42
43
44
# File 'lib/srcML/srcml.rb', line 41

def self.ignore_comments= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@ignore_comments = bool
end

.ignore_comments?Boolean

Returns:

  • (Boolean)


46
47
48
# File 'lib/srcML/srcml.rb', line 46

def self.ignore_comments?
  @@ignore_comments
end

.ignore_whitespace=(bool) ⇒ Object



50
51
52
53
# File 'lib/srcML/srcml.rb', line 50

def self.ignore_whitespace= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@ignore_whitespace = bool
end

.ignore_whitespace?Boolean

Returns:

  • (Boolean)


55
56
57
# File 'lib/srcML/srcml.rb', line 55

def self.ignore_whitespace?
  @@ignore_whitespace
end

.methods(path, revision: FALSE) ⇒ Hash[method_name => method_hash]

Returns the methods of the given file

If the method has any parameters, the parameter types are also returned with the method name

Parameters:

  • path (String)

    the path to the file

  • rev (String)

    if specified, retrieves the file from the given revision

Returns:

  • (Hash[method_name => method_hash])

    a hash storing the methods as keys and the hashed method as values



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/srcML/srcml.rb', line 144

def self.methods(path,revision: FALSE)
  ast = ast(path,revision: revision)
  # hash each method and store in hash map with function name as key
  methods = Hash.new
  qualified_file = path
  if basename_qualify?
    qualified_file = File.basename(path)
  end
  # split file based on class declarations
  partitions = ast.search("class")
  if partitions.empty?
    # no classes, just use the full ast
    partitions = [ast]
  end
  partitions.each do |partition|
    # if partitioned into classes, attempt to extract class name
    class_name = ''
    if !partition.document? & name = partition.at_css("/name")
      class_name = name.text
    else
      logger.debug "(#{qualified_file}) Found partitioned file but could not find classname for this partition at location \\name'. Context:\n###\n#{partition}\n###"
    end
    partition.search("function").each do |function|
      if name = function.at_css("/name")
        # attempt to extract parameters
        parameters = []
        if parameter_list = function.at_css("/parameter_list")
          parameter_list.search("parameter").each do |p|
            if parameter = (p.at_css("decl type name name") or p.at_css("decl type name") or p.at_css("decl type") or p.at_css("type") or p.at_css("name"))
              parameters << parameter.text
            else
              logger.debug "(#{qualified_file}) Function: #{name}. Nested structures: (decl type name name) or (decl type name) or (decl type) not found in parameter xml, ignoring this parameter. Context:\n###\n#{p}\n###"
            end
          end
        else
          logger.debug "(#{qualified_file}) Parameter list not found for Function: #{name}. Searched for structure '/parameter_list'. Context:\n###\n#{function}\n###"
        end
        method_name = parameters.empty? ? name.text : name.text+"("+parameters.join(',')+")"
        fully_qualified_name = class_name.empty? ? [qualified_file,method_name].join(':') : [qualified_file,class_name,method_name].join(':')
        if block = function.at_css("block")
          methods[fully_qualified_name] = block.content.hash
        else
          logger.debug "(#{qualified_file}) No <block> (i.e. the function content) in the function xml. Function: #{name}."
        end
      else
        logger.debug "(#{qualified_file}) Could not identify function name at location '\\name'. Context:\n###\n#{p}\n###"
      end
    end
  end
  if residuals?
    # add residuals entry
    # i.e., whats left of the code when all methods are removed
    ast.search("function").remove
    methods[qualified_file+':'+'@residuals'] = ast.content.hash
  end

  return methods
end

.residuals=(bool) ⇒ Object



68
69
70
71
# File 'lib/srcML/srcml.rb', line 68

def self.residuals= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@residuals = bool
end

.residuals?Boolean

Returns:

  • (Boolean)


73
74
75
# File 'lib/srcML/srcml.rb', line 73

def self.residuals?
  @@residuals
end