Class: Gitlab::Git::Diff

Inherits:
Object
  • Object
show all
Includes:
EncodingHelper
Defined in:
lib/gitlab/git/diff.rb

Constant Summary collapse

TimeoutError =
Class.new(StandardError)
DEFAULT_MAX_PATCH_BYTES =

The default maximum content size to display a diff patch.

If this value ever changes, make sure to create a migration to update current records, and default of ‘ApplicationSettings#diff_max_patch_bytes`.

200.kilobytes
MAX_PATCH_BYTES_UPPER_BOUND =

This is a limitation applied on the source (Gitaly), therefore we don’t allow persisting limits over that.

500.kilobytes
SERIALIZE_KEYS =
%i[diff new_path old_path a_mode b_mode new_file renamed_file deleted_file too_large].freeze
BINARY_NOTICE_PATTERN =
%r{Binary files (.*) and (.*) differ}

Constants included from EncodingHelper

EncodingHelper::BOM_UTF8, EncodingHelper::ENCODING_CONFIDENCE_THRESHOLD, EncodingHelper::ESCAPED_CHARS, EncodingHelper::UNICODE_REPLACEMENT_CHARACTER

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from EncodingHelper

#binary_io, #detect_binary?, #detect_encoding, #detect_libgit2_binary?, #encode!, #encode_binary, #encode_utf8, #encode_utf8_no_detect, #encode_utf8_with_escaping!, #encode_utf8_with_replacement_character, #strip_bom, #unquote_path

Constructor Details

#initialize(raw_diff, expanded: true, replace_invalid_utf8_chars: true) ⇒ Diff

Returns a new instance of Diff.



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/gitlab/git/diff.rb', line 147

def initialize(raw_diff, expanded: true, replace_invalid_utf8_chars: true)
  @expanded = expanded

  case raw_diff
  when Hash
    init_from_hash(raw_diff)
    prune_diff_if_eligible
  when Gitlab::GitalyClient::Diff
    init_from_gitaly(raw_diff)
    prune_diff_if_eligible
  when Gitaly::CommitDelta
    init_from_gitaly(raw_diff)
  when nil
    raise "Nil as raw diff passed"
  else
    raise "Invalid raw diff type: #{raw_diff.class}"
  end

  encode_diff_to_utf8(replace_invalid_utf8_chars)
end

Instance Attribute Details

#a_modeObject

Diff properties



10
11
12
# File 'lib/gitlab/git/diff.rb', line 10

def a_mode
  @a_mode
end

#b_modeObject

Diff properties



10
11
12
# File 'lib/gitlab/git/diff.rb', line 10

def b_mode
  @b_mode
end

#deleted_fileObject Also known as: deleted_file?

Stats properties



13
14
15
# File 'lib/gitlab/git/diff.rb', line 13

def deleted_file
  @deleted_file
end

#diffObject

Diff properties



10
11
12
# File 'lib/gitlab/git/diff.rb', line 10

def diff
  @diff
end

#expandedObject Also known as: expanded?

Returns the value of attribute expanded.



19
20
21
# File 'lib/gitlab/git/diff.rb', line 19

def expanded
  @expanded
end

#new_fileObject Also known as: new_file?

Stats properties



13
14
15
# File 'lib/gitlab/git/diff.rb', line 13

def new_file
  @new_file
end

#new_pathObject

Diff properties



10
11
12
# File 'lib/gitlab/git/diff.rb', line 10

def new_path
  @new_path
end

#old_pathObject

Diff properties



10
11
12
# File 'lib/gitlab/git/diff.rb', line 10

def old_path
  @old_path
end

#renamed_fileObject Also known as: renamed_file?

Stats properties



13
14
15
# File 'lib/gitlab/git/diff.rb', line 13

def renamed_file
  @renamed_file
end

#too_large=(value) ⇒ Object (writeonly)

Sets the attribute too_large

Parameters:

  • value

    the value to set the attribute too_large to.



20
21
22
# File 'lib/gitlab/git/diff.rb', line 20

def too_large=(value)
  @too_large = value
end

Class Method Details

.between(repo, head, base, options = {}, *paths) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/gitlab/git/diff.rb', line 39

def between(repo, head, base, options = {}, *paths)
  straight = options.delete(:straight) || false

  common_commit = if straight
                    base
                  else
                    # Only show what is new in the source branch
                    # compared to the target branch, not the other way
                    # around. The line below with merge_base is
                    # equivalent to diff with three dots (git diff
                    # branch1...branch2) From the git documentation:
                    # "git diff A...B" is equivalent to "git diff
                    # $(git-merge-base A B) B"
                    repo.merge_base(head, base)
                  end

  options ||= {}
  actual_options = filter_diff_options(options)
  repo.diff(common_commit, head, actual_options, *paths)
end

.binary_message(old_path, new_path) ⇒ Object

Return a binary diff message like:

“Binary files a/file/path and b/file/path differn” This is used when we detect that a diff is binary using CharlockHolmes.



113
114
115
# File 'lib/gitlab/git/diff.rb', line 113

def binary_message(old_path, new_path)
  "Binary files #{old_path} and #{new_path} differ\n"
end

.collect_patch_overage?Boolean

Returns:

  • (Boolean)


129
130
131
# File 'lib/gitlab/git/diff.rb', line 129

def collect_patch_overage?
  !!Feature.enabled?(:collect_all_diff_paths)
end

.filter_diff_options(options, default_options = {}) ⇒ Object

Return a copy of the options hash containing only recognized keys. Allowed options are:

:ignore_whitespace_change ::
  If true, changes in amount of whitespace will be ignored.

:max_files ::
  Limit how many files will patches be allowed for before collapsing

:max_lines ::
  Limit how many patch lines (across all files) will be allowed for
  before collapsing

:limits ::
  A hash with additional limits to check before collapsing patches.
  Allowed keys are: `max_bytes`, `safe_max_files`, `safe_max_lines`
  and `safe_max_bytes`

:expanded ::
  If false, patch raw data will not be included in the diff after
  `max_files`, `max_lines` or any of the limits in `limits` are
  exceeded


82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/gitlab/git/diff.rb', line 82

def filter_diff_options(options, default_options = {})
  allowed_options = [:ignore_whitespace_change, :max_files, :max_lines,
                     :limits, :expanded, :collect_all_paths]

  if default_options
    actual_defaults = default_options.dup
    actual_defaults.keep_if do |key|
      allowed_options.include?(key)
    end
  else
    actual_defaults = {}
  end

  if options
    filtered_opts = options.dup
    filtered_opts.keep_if do |key|
      allowed_options.include?(key)
    end
    filtered_opts = actual_defaults.merge(filtered_opts)
  else
    filtered_opts = actual_defaults
  end

  filtered_opts
end

.has_binary_notice?(text) ⇒ Boolean

Returns:

  • (Boolean)


141
142
143
144
145
# File 'lib/gitlab/git/diff.rb', line 141

def has_binary_notice?(text)
  return false unless text.present?

  text.start_with?(BINARY_NOTICE_PATTERN)
end

.patch_hard_limit_bytesObject

Returns the limit for a single diff file (patch).

Patches surpassing this limit shouldn’t be persisted in the database and will be presented as ‘too large’ for end-users.



137
138
139
# File 'lib/gitlab/git/diff.rb', line 137

def patch_hard_limit_bytes
  Gitlab::CurrentSettings.diff_max_patch_bytes
end

.patch_safe_limit_bytes(limit = patch_hard_limit_bytes) ⇒ Object

Returns the limit of bytes a single diff file can reach before it appears as ‘collapsed’ for end-users. By convention, it’s 10% of the persisted ‘diff_max_patch_bytes`.

Example: If we have 100k for the ‘diff_max_patch_bytes`, it will be 10k by default.

Patches surpassing this limit should still be persisted in the database.



125
126
127
# File 'lib/gitlab/git/diff.rb', line 125

def patch_safe_limit_bytes(limit = patch_hard_limit_bytes)
  limit / 10
end

Instance Method Details

#collapse!Object



231
232
233
234
# File 'lib/gitlab/git/diff.rb', line 231

def collapse!
  prune!
  @collapsed = true
end

#collapsed?Boolean

Returns:

  • (Boolean)


225
226
227
228
229
# File 'lib/gitlab/git/diff.rb', line 225

def collapsed?
  return @collapsed if defined?(@collapsed)

  @collapsed = !expanded && diff_bytesize >= self.class.patch_safe_limit_bytes
end

#diff_bytesizeObject



200
201
202
# File 'lib/gitlab/git/diff.rb', line 200

def diff_bytesize
  @diff_bytesize ||= @diff.bytesize
end

#has_binary_notice?Boolean

Returns:

  • (Boolean)


252
253
254
# File 'lib/gitlab/git/diff.rb', line 252

def has_binary_notice?
  self.class.has_binary_notice?(@diff)
end

#json_safe_diffObject



245
246
247
248
249
250
# File 'lib/gitlab/git/diff.rb', line 245

def json_safe_diff
  return @diff unless detect_binary?(@diff)

  # the diff is binary, let's make a message for it
  Diff.binary_message(@old_path, @new_path)
end

#line_countObject



196
197
198
# File 'lib/gitlab/git/diff.rb', line 196

def line_count
  @line_count ||= Util.count_lines(@diff)
end

#mode_changed?Boolean

Returns:

  • (Boolean)


178
179
180
# File 'lib/gitlab/git/diff.rb', line 178

def mode_changed?
  a_mode && b_mode && a_mode != b_mode
end

#overflow?Boolean

Returns:

  • (Boolean)


236
237
238
239
240
241
242
243
# File 'lib/gitlab/git/diff.rb', line 236

def overflow?
  return @overflow if defined?(@overflow)

  # If overflow is not defined, we're
  # not recieveing a diff from Gitaly
  # and overflow has no meaning
  false
end

#prune!Object



215
216
217
218
# File 'lib/gitlab/git/diff.rb', line 215

def prune!
  @diff = ''
  @line_count = 0
end

#submodule?Boolean

Returns:

  • (Boolean)


182
183
184
# File 'lib/gitlab/git/diff.rb', line 182

def submodule?
  a_mode == '160000' || b_mode == '160000'
end

#to_hashObject



168
169
170
171
172
173
174
175
176
# File 'lib/gitlab/git/diff.rb', line 168

def to_hash
  hash = {}

  SERIALIZE_KEYS.each do |key|
    hash[key] = send(key) # rubocop:disable GitlabSecurity/PublicSend
  end

  hash
end

#too_large!Object



220
221
222
223
# File 'lib/gitlab/git/diff.rb', line 220

def too_large!
  prune!
  @too_large = true
end

#too_large?Boolean Also known as: too_large

Returns:

  • (Boolean)


204
205
206
207
208
209
210
# File 'lib/gitlab/git/diff.rb', line 204

def too_large?
  if @too_large.nil?
    @too_large = diff_bytesize >= self.class.patch_hard_limit_bytes
  else
    @too_large
  end
end

#unidiffObject



186
187
188
189
190
191
192
193
194
# File 'lib/gitlab/git/diff.rb', line 186

def unidiff
  return diff if diff.blank?
  return json_safe_diff if detect_binary?(@diff) || has_binary_notice?

  old_path_header = new_file? ? '/dev/null' : "a/#{old_path}"
  new_path_header = deleted_file? ? '/dev/null' : "b/#{new_path}"

  "--- #{old_path_header}\n+++ #{new_path_header}\n" + diff
end