Class: IOStreams::Encode::Reader

Inherits:
Reader
  • Object
show all
Defined in:
lib/io_streams/encode/reader.rb

Constant Summary collapse

NOT_PRINTABLE =
Regexp.compile(/[^[:print:]|\r|\n]/).freeze
CLEANSE_RULES =

Builtin strip options to apply after encoding the read data.

{
  # Strips all non printable characters
  printable:             ->(data, _) { data.gsub!(NOT_PRINTABLE, "") || data },
  # Replaces non printable characters with the value specified in the `replace` option.
  replace_non_printable: ->(data, replace) { data.gsub!(NOT_PRINTABLE, replace || "") || data }
}.freeze

Instance Attribute Summary collapse

Attributes inherited from Reader

#input_stream

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Reader

file, open

Constructor Details

#initialize(input_stream, encoding: "UTF-8", cleaner: nil, replace: nil) ⇒ Reader

Apply encoding conversion when reading a stream.

Parameters

input_stream
  The input stream that implements #read

encoding: [String|Encoding]
  Encode returned data with this encoding.
  'US-ASCII':   Original 7 bit ASCII Format
  'ASCII-8BIT': 8-bit ASCII Format
  'UTF-8':      UTF-8 Format
  Etc.
  Default: 'UTF-8'

replace: [String]
  The character to replace with when a character cannot be converted to the target encoding.
  nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
  Default: nil

cleaner: [nil|symbol|Proc]
  Cleanse data read from the input stream.
  nil:           No cleansing
  :printable Cleanse all non-printable characters except \r and \n
  Proc/lambda    Proc to call after every read to cleanse the data
  Default: nil


45
46
47
48
49
50
51
52
53
54
55
# File 'lib/io_streams/encode/reader.rb', line 45

def initialize(input_stream, encoding: "UTF-8", cleaner: nil, replace: nil)
  super(input_stream)

  @cleaner          = self.class.extract_cleaner(cleaner)
  @encoding         = encoding.nil? || encoding.is_a?(Encoding) ? encoding : Encoding.find(encoding)
  @encoding_options = replace.nil? ? {} : {invalid: :replace, undef: :replace, replace: replace}
  @replace          = replace

  # More efficient read buffering only supported when the input stream `#read` method supports it.
  @read_cache_buffer = ("".encode(@encoding) if replace.nil? && !@input_stream.method(:read).arity.between?(0, 1))
end

Instance Attribute Details

#cleanerObject (readonly)

Returns the value of attribute cleaner.



4
5
6
# File 'lib/io_streams/encode/reader.rb', line 4

def cleaner
  @cleaner
end

#encodingObject (readonly)

Returns the value of attribute encoding.



4
5
6
# File 'lib/io_streams/encode/reader.rb', line 4

def encoding
  @encoding
end

Class Method Details

.extract_cleaner(cleaner) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/io_streams/encode/reader.rb', line 81

def self.extract_cleaner(cleaner)
  return if cleaner.nil?

  case cleaner
  when Symbol
    proc = CLEANSE_RULES[cleaner]
    raise(ArgumentError, "Invalid cleansing rule #{cleaner.inspect}") unless proc

    proc
  when Proc
    cleaner
  end
end

.stream(input_stream, original_file_name: nil, **args) {|new(input_stream, **args)| ... } ⇒ Object

Read a line at a time from a file or stream

Yields:



16
17
18
# File 'lib/io_streams/encode/reader.rb', line 16

def self.stream(input_stream, original_file_name: nil, **args)
  yield new(input_stream, **args)
end

Instance Method Details

#read(size = nil) ⇒ Object

Returns [String] data returned from the input stream. Returns [nil] if end of file and no further data was read.



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/io_streams/encode/reader.rb', line 59

def read(size = nil)
  block =
    if @read_cache_buffer
      begin
        @input_stream.read(size, @read_cache_buffer)
      rescue ArgumentError
        # Handle arity of -1 when just 0..1
        @read_cache_buffer = nil
        @input_stream.read(size)
      end
    else
      @input_stream.read(size)
    end

  # EOF reached?
  return unless block

  block = block.encode(@encoding, **@encoding_options) unless block.encoding == @encoding
  block = @cleaner.call(block, @replace) if @cleaner
  block
end