Class: IOStreams::Encode::Reader

Inherits:
Object
  • Object
show all
Defined in:
lib/io_streams/encode/reader.rb

Constant Summary collapse

NOT_PRINTABLE =
Regexp.compile(/[^[:print:]|\r|\n]/).freeze
CLEANSE_RULES =

Builtin strip options to apply after encoding the read data.

{
  # Strips all non printable characters
  printable: -> (data) { data.gsub!(NOT_PRINTABLE, '') || data }
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input_stream, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil) ⇒ Reader

Apply encoding conversion when reading a stream.

Parameters

input_stream
  The input stream that implements #read

encoding: [String|Encoding]
  Encode returned data with this encoding.
  'US-ASCII':   Original 7 bit ASCII Format
  'ASCII-8BIT': 8-bit ASCII Format
  'UTF-8':      UTF-8 Format
  Etc.
  Default: 'UTF-8'

encode_replace: [String]
  The character to replace with when a character cannot be converted to the target encoding.
  nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
  Default: nil

encode_cleaner: [nil|symbol|Proc]
  Cleanse data read from the input stream.
  nil:           No cleansing
  :printable Cleanse all non-printable characters except \r and \n
  Proc/lambda    Proc to call after every read to cleanse the data
  Default: nil


47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/io_streams/encode/reader.rb', line 47

def initialize(input_stream, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil)
  @input_stream = input_stream
  @cleaner      = self.class.extract_cleaner(encode_cleaner)

  @encoding         = encoding.nil? || encoding.is_a?(Encoding) ? encoding : Encoding.find(encoding)
  @encoding_options = encode_replace.nil? ? {} : {invalid: :replace, undef: :replace, replace: encode_replace}

  # More efficient read buffering only supported when the input stream `#read` method supports it.
  if encode_replace.nil? && !@input_stream.method(:read).arity.between?(0, 1)
    @read_cache_buffer = ''.encode(@encoding)
  else
    @read_cache_buffer = nil
  end
end

Instance Attribute Details

#cleanerObject (readonly)

Returns the value of attribute cleaner.



4
5
6
# File 'lib/io_streams/encode/reader.rb', line 4

def cleaner
  @cleaner
end

#encodingObject (readonly)

Returns the value of attribute encoding.



4
5
6
# File 'lib/io_streams/encode/reader.rb', line 4

def encoding
  @encoding
end

Class Method Details

.open(file_name_or_io, **args) ⇒ Object

Read a line at a time from a file or stream



14
15
16
17
18
19
20
# File 'lib/io_streams/encode/reader.rb', line 14

def self.open(file_name_or_io, **args)
  if file_name_or_io.is_a?(String)
    IOStreams::File::Reader.open(file_name_or_io) { |io| yield new(io, **args) }
  else
    yield new(file_name_or_io, **args)
  end
end

Instance Method Details

#read(size = nil) ⇒ Object

Returns [String] data returned from the input stream. Returns [nil] if end of file and no further data was read.



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/io_streams/encode/reader.rb', line 64

def read(size = nil)
  block =
    if @read_cache_buffer
      begin
        @input_stream.read(size, @read_cache_buffer)
      rescue ArgumentError
        # Handle arity of -1 when just 0..1
        @read_cache_buffer = nil
        @input_stream.read(size)
      end
    else
      @input_stream.read(size)
    end

  # EOF reached?
  return unless block

  block = block.encode(@encoding, @encoding_options) unless block.encoding == @encoding
  block = @cleaner.call(block) if @cleaner
  block
end