Module: Gitlab::BlobHelper

Includes:: Utils::StrongMemoize

Included in:: BlobLike, Git::Blob

Defined in:: lib/gitlab/blob_helper.rb

Constant Summary collapse

MEGABYTE =

1024 * 1024

Instance Method Summary collapse

#_mime_type ⇒ Object

Internal: Lookup mime type for extension.
#binary_in_repo? ⇒ Boolean
#binary_mime_type? ⇒ Boolean
#content_type ⇒ Object
#empty? ⇒ Boolean
#encoded_newlines_re ⇒ Object
#encoding ⇒ Object
#extname ⇒ Object
#image? ⇒ Boolean
#known_extension? ⇒ Boolean
#large? ⇒ Boolean
#lines ⇒ Object
#mime_type ⇒ Object

Public: Get the actual blob mime type.
#ruby_encoding ⇒ Object
#text_in_repo? ⇒ Boolean
#viewable? ⇒ Boolean

Instance Method Details

#_mime_type ⇒ `Object`

Internal: Lookup mime type for extension.

Returns a MIME::Type rubocop:disable Gitlab/ModuleWithInstanceVariables

# File 'lib/gitlab/blob_helper.rb', line 57

def _mime_type
  if defined? @_mime_type
    @_mime_type
  else
    guesses = ::MIME::Types.type_for(extname.to_s)

    # Prefer text mime types over binary
    @_mime_type = guesses.detect { |type| type.ascii? } || guesses.first
  end
end

#binary_in_repo? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/gitlab/blob_helper.rb', line 26

def binary_in_repo?
  # Large blobs aren't even loaded into memory
  if data.nil?
    true

  # Treat blank files as text
  elsif data == ""
    false

  # Charlock doesn't know what to think
  elsif encoding.nil?
    true

  # If Charlock says its binary
  else
    find_encoding[:type] == :binary
  end
end

#binary_mime_type? ⇒ `Boolean`

Returns:

(Boolean)



81
82
83

# File 'lib/gitlab/blob_helper.rb', line 81

def binary_mime_type?
  _mime_type ? _mime_type.binary? : false
end

#content_type ⇒ `Object`

# File 'lib/gitlab/blob_helper.rb', line 115

def content_type
  # rubocop:disable Style/MultilineTernaryOperator
  # rubocop:disable Style/NestedTernaryOperator
  @content_type ||= binary_mime_type? || binary_in_repo? ? mime_type :
                      (encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain")
  # rubocop:enable Style/NestedTernaryOperator
  # rubocop:enable Style/MultilineTernaryOperator
end

#empty? ⇒ `Boolean`

Returns:

(Boolean)



151
152
153

# File 'lib/gitlab/blob_helper.rb', line 151

def empty?
  data.nil? || data == ""
end

#encoded_newlines_re ⇒ `Object`

# File 'lib/gitlab/blob_helper.rb', line 124

def encoded_newlines_re
  strong_memoize(:encoded_newlines_re) do
    newlines = ["\r\n", "\r", "\n"]
    data_encoding = data&.encoding

    if ruby_encoding && data_encoding
      newlines.map! do |nl|
        nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data_encoding)
      end
    end

    Regexp.union(newlines)
  end
end

#encoding ⇒ `Object`

# File 'lib/gitlab/blob_helper.rb', line 145

def encoding
  if hash = find_encoding
    hash[:encoding]
  end
end

#extname ⇒ `Object`



8
9
10

# File 'lib/gitlab/blob_helper.rb', line 8

def extname
  File.extname(name.to_s)
end

#image? ⇒ `Boolean`

Returns:

(Boolean)



49
50
51

# File 'lib/gitlab/blob_helper.rb', line 49

def image?
  ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp'].include?(extname.downcase)
end

#known_extension? ⇒ `Boolean`

Returns:

(Boolean)



12
13
14

# File 'lib/gitlab/blob_helper.rb', line 12

def known_extension?
  LanguageData.extensions.include?(extname)
end

#large? ⇒ `Boolean`

Returns:

(Boolean)



22
23
24

# File 'lib/gitlab/blob_helper.rb', line 22

def large?
  size.to_i > MEGABYTE
end

#lines ⇒ `Object`

# File 'lib/gitlab/blob_helper.rb', line 85

def lines
  @lines ||=
    if viewable? && data
      # `data` is usually encoded as ASCII-8BIT even when the content has
      # been detected as a different encoding. However, we are not allowed
      # to change the encoding of `data` because we've made the implicit
      # guarantee that each entry in `lines` is encoded the same way as
      # `data`.
      #
      # Instead, we re-encode each possible newline sequence as the
      # detected encoding, then force them back to the encoding of `data`
      # (usually a binary encoding like ASCII-8BIT). This means that the
      # byte sequence will match how newlines are likely encoded in the
      # file, but we don't have to change the encoding of `data` as far as
      # Ruby is concerned. This allows us to correctly parse out each line
      # without changing the encoding of `data`, and
      # also--importantly--without having to duplicate many (potentially
      # large) strings.
      begin
        data.split(encoded_newlines_re, -1)
      rescue Encoding::ConverterNotFoundError
        # The data is not splittable in the detected encoding.  Assume it's
        # one big line.
        [data]
      end
    else
      []
    end
end

#mime_type ⇒ `Object`

Public: Get the actual blob mime type

Examples

# => 'text/plain'
# => 'text/html'

Returns a mime type String.



77
78
79

# File 'lib/gitlab/blob_helper.rb', line 77

def mime_type
  _mime_type ? _mime_type.to_s : 'text/plain'
end

#ruby_encoding ⇒ `Object`

# File 'lib/gitlab/blob_helper.rb', line 139

def ruby_encoding
  if hash = find_encoding
    hash[:ruby_encoding]
  end
end

#text_in_repo? ⇒ `Boolean`

Returns:

(Boolean)



45
46
47

# File 'lib/gitlab/blob_helper.rb', line 45

def text_in_repo?
  !binary_in_repo?
end

#viewable? ⇒ `Boolean`

Returns:

(Boolean)



16
17
18

# File 'lib/gitlab/blob_helper.rb', line 16

def viewable?
  !large? && text_in_repo?
end

Module: Gitlab::BlobHelper

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#_mime_type ⇒ Object

#binary_in_repo? ⇒ Boolean

#binary_mime_type? ⇒ Boolean

#content_type ⇒ Object

#empty? ⇒ Boolean

#encoded_newlines_re ⇒ Object

#encoding ⇒ Object

#extname ⇒ Object

#image? ⇒ Boolean

#known_extension? ⇒ Boolean

#large? ⇒ Boolean

#lines ⇒ Object

#mime_type ⇒ Object

#ruby_encoding ⇒ Object

#text_in_repo? ⇒ Boolean

#viewable? ⇒ Boolean

#_mime_type ⇒ `Object`

#binary_in_repo? ⇒ `Boolean`

#binary_mime_type? ⇒ `Boolean`

#content_type ⇒ `Object`

#empty? ⇒ `Boolean`

#encoded_newlines_re ⇒ `Object`

#encoding ⇒ `Object`

#extname ⇒ `Object`

#image? ⇒ `Boolean`

#known_extension? ⇒ `Boolean`

#large? ⇒ `Boolean`

#lines ⇒ `Object`

#mime_type ⇒ `Object`

#ruby_encoding ⇒ `Object`

#text_in_repo? ⇒ `Boolean`

#viewable? ⇒ `Boolean`