Class: Transpotter

Inherits:
Object
  • Object
show all
Defined in:
lib/transpotter.rb

Constant Summary collapse

DEFAULT_SAMPLE_SIZE =

1 MB

1024 * 1024
MOST_COMMON_ENCODINGS =
(%w{
  UTF-8
  ISO-8859-1
  Windows-1251
  GB2312
  SJIS
  Windows-1252
  GBK
  ISO-8859-2
  EUC-JP
  Windows-1256
  ISO-8859-15
  ISO-8859-9
  EUC-KR
  Windows-1250
  Windows-1254
  Big5
  Windows-874
  US-ASCII
  TIS-620
} + Encoding.name_list).uniq.map { |name| Encoding.find(name) }
ENCODING_ERRORS =
[
  Encoding::InvalidByteSequenceError,
  Encoding::UndefinedConversionError,
  Encoding::ConverterNotFoundError
]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filename, data = nil, samplesize = nil) ⇒ Transpotter

Returns a new instance of Transpotter.



38
39
40
41
42
# File 'lib/transpotter.rb', line 38

def initialize(filename, data = nil, samplesize = nil)
  @samplesize = samplesize || DEFAULT_SAMPLE_SIZE
  @filename = filename
  @data = data
end

Instance Attribute Details

#filenameObject (readonly)

Returns the value of attribute filename.



4
5
6
# File 'lib/transpotter.rb', line 4

def filename
  @filename
end

Instance Method Details

#each_lineObject



59
60
61
62
63
64
65
66
67
68
# File 'lib/transpotter.rb', line 59

def each_line
  return unless sample # don't do anything if we can't grab sample
  if @filename
    open_encoded_file do |io|
      io.each(line_endings.encode(@encoding)) { |line| yield convert(line) }
    end
  elsif @data
    convert(@data).split(line_endings).each { |line| yield line }
  end
end

#encodingObject



44
45
46
# File 'lib/transpotter.rb', line 44

def encoding
  @encoding ||= (charlock || brute_force)
end

#readObject



48
49
50
51
52
53
54
55
56
57
# File 'lib/transpotter.rb', line 48

def read
  return unless sample # don't do anything if we can't grab sample
  if @filename
    open_encoded_file do |io|
      return convert(io.read)
    end
  elsif @data
    convert(@data)
  end
end