Module: LineDetector

Defined in:
lib/version.rb,
lib/line-detector.rb

Overview

LineDetector - line ending detector

Constant Summary collapse

VERSION =
'0.2'
EOL_CHARACTERS =
[
  "\r",
  "\n",
  "\v",
  "\f",
  "\u2028",
  "\u2029",
  "\u0085"
]
EOL2NAME =
{
  "\r\n" => :crlf,
  "\n\r" => :lfcr,
  "\n" => :lf,
  "\r" => :cr,
  "\v" => :vt,
  "\f" => :ff,
  "\u2028" => :ls,
  "\u2029" => :ps,
  "\u0085" => :nel
}
NAME2EOL =
EOL2NAME.invert

Class Method Summary collapse

Class Method Details

.detect_line_ending_of_file(filename) ⇒ Object

Detect line ending format of a file

Assumes file is a text file.



61
62
63
# File 'lib/line-detector.rb', line 61

def self.detect_line_ending_of_file(filename)
  detect_line_ending_of_text(open(filename).read)
end

.detect_line_ending_of_text(text) ⇒ Object

Detect line ending format of arbitrary text

If text uses multiple line ending formats, Returns :mix.



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/line-detector.rb', line 39

def self.detect_line_ending_of_text(text)
  line_endings = text.split(/[^#{EOL_CHARACTERS.join('')}]/)
    .reject { |ending| ending == '' }
    .map { |ending| ending.gsub(/(.+?)(\1)+/m, '\1') }
    .uniq

  len = line_endings.length

  if len == 0
    :none
  elsif len == 1
    EOL2NAME[line_endings.first] || :unknown
  else
    :mix
  end
end

.lines(text) ⇒ Object

A more capable version of String#lines, that handles some of the more obscure line ending formats.

If line ending format cannot be determined, returns :unknown.



72
73
74
75
76
77
78
79
80
81
82
# File 'lib/line-detector.rb', line 72

def self.lines(text)
  line_ending = detect_line_ending_of_text(text)

  if line_ending == :unknown
    line_ending
  elsif line_ending == :none
    [text]
  else
    text.split(NAME2EOL[line_ending])
  end
end