Module: Gitlab::BlobHelper

Includes:
Utils::StrongMemoize
Included in:
BlobLike, Git::Blob
Defined in:
lib/gitlab/blob_helper.rb

Constant Summary collapse

MEGABYTE =
1024 * 1024

Instance Method Summary collapse

Methods included from Utils::StrongMemoize

#clear_memoization, #strong_memoize, #strong_memoized?

Instance Method Details

#_mime_typeObject

Internal: Lookup mime type for extension.

Returns a MIME::Type rubocop:disable Gitlab/ModuleWithInstanceVariables


57
58
59
60
61
62
63
64
65
66
# File 'lib/gitlab/blob_helper.rb', line 57

def _mime_type
  if defined? @_mime_type
    @_mime_type
  else
    guesses = ::MIME::Types.type_for(extname.to_s)

    # Prefer text mime types over binary
    @_mime_type = guesses.detect { |type| type.ascii? } || guesses.first
  end
end

#binary_in_repo?Boolean

Returns:

  • (Boolean)

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/gitlab/blob_helper.rb', line 26

def binary_in_repo?
  # Large blobs aren't even loaded into memory
  if data.nil?
    true

  # Treat blank files as text
  elsif data == ""
    false

  # Charlock doesn't know what to think
  elsif encoding.nil?
    true

  # If Charlock says its binary
  else
    detect_encoding[:type] == :binary
  end
end

#binary_mime_type?Boolean

Returns:

  • (Boolean)

81
82
83
# File 'lib/gitlab/blob_helper.rb', line 81

def binary_mime_type?
  _mime_type ? _mime_type.binary? : false
end

#content_typeObject


115
116
117
118
119
120
121
122
# File 'lib/gitlab/blob_helper.rb', line 115

def content_type
  # rubocop:disable Style/MultilineTernaryOperator
  # rubocop:disable Style/NestedTernaryOperator
  @content_type ||= binary_mime_type? || binary_in_repo? ? mime_type :
                      (encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain")
  # rubocop:enable Style/NestedTernaryOperator
  # rubocop:enable Style/MultilineTernaryOperator
end

#detect_encodingObject


151
152
153
# File 'lib/gitlab/blob_helper.rb', line 151

def detect_encoding
  @detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data # rubocop:disable Gitlab/ModuleWithInstanceVariables
end

#empty?Boolean

Returns:

  • (Boolean)

155
156
157
# File 'lib/gitlab/blob_helper.rb', line 155

def empty?
  data.nil? || data == ""
end

#encoded_newlines_reObject


124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/gitlab/blob_helper.rb', line 124

def encoded_newlines_re
  strong_memoize(:encoded_newlines_re) do
    newlines = ["\r\n", "\r", "\n"]
    data_encoding = data&.encoding

    if ruby_encoding && data_encoding
      newlines.map! do |nl|
        nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data_encoding)
      end
    end

    Regexp.union(newlines)
  end
end

#encodingObject


145
146
147
148
149
# File 'lib/gitlab/blob_helper.rb', line 145

def encoding
  if hash = detect_encoding
    hash[:encoding]
  end
end

#extnameObject


8
9
10
# File 'lib/gitlab/blob_helper.rb', line 8

def extname
  File.extname(name.to_s)
end

#image?Boolean

Returns:

  • (Boolean)

49
50
51
# File 'lib/gitlab/blob_helper.rb', line 49

def image?
  ['.png', '.jpg', '.jpeg', '.gif', '.svg'].include?(extname.downcase)
end

#known_extension?Boolean

Returns:

  • (Boolean)

12
13
14
# File 'lib/gitlab/blob_helper.rb', line 12

def known_extension?
  LanguageData.extensions.include?(extname)
end

#large?Boolean

Returns:

  • (Boolean)

22
23
24
# File 'lib/gitlab/blob_helper.rb', line 22

def large?
  size.to_i > MEGABYTE
end

#linesObject


85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/gitlab/blob_helper.rb', line 85

def lines
  @lines ||=
    if viewable? && data
      # `data` is usually encoded as ASCII-8BIT even when the content has
      # been detected as a different encoding. However, we are not allowed
      # to change the encoding of `data` because we've made the implicit
      # guarantee that each entry in `lines` is encoded the same way as
      # `data`.
      #
      # Instead, we re-encode each possible newline sequence as the
      # detected encoding, then force them back to the encoding of `data`
      # (usually a binary encoding like ASCII-8BIT). This means that the
      # byte sequence will match how newlines are likely encoded in the
      # file, but we don't have to change the encoding of `data` as far as
      # Ruby is concerned. This allows us to correctly parse out each line
      # without changing the encoding of `data`, and
      # also--importantly--without having to duplicate many (potentially
      # large) strings.
      begin
        data.split(encoded_newlines_re, -1)
      rescue Encoding::ConverterNotFoundError
        # The data is not splittable in the detected encoding.  Assume it's
        # one big line.
        [data]
      end
    else
      []
    end
end

#mime_typeObject

Public: Get the actual blob mime type

Examples

# => 'text/plain'
# => 'text/html'

Returns a mime type String.


77
78
79
# File 'lib/gitlab/blob_helper.rb', line 77

def mime_type
  _mime_type ? _mime_type.to_s : 'text/plain'
end

#ruby_encodingObject


139
140
141
142
143
# File 'lib/gitlab/blob_helper.rb', line 139

def ruby_encoding
  if hash = detect_encoding
    hash[:ruby_encoding]
  end
end

#text_in_repo?Boolean

Returns:

  • (Boolean)

45
46
47
# File 'lib/gitlab/blob_helper.rb', line 45

def text_in_repo?
  !binary_in_repo?
end

#viewable?Boolean

Returns:

  • (Boolean)

16
17
18
# File 'lib/gitlab/blob_helper.rb', line 16

def viewable?
  !large? && text_in_repo?
end