Class: Nokogiri::HTML::Document::EncodingReader

Inherits:
Object
  • Object
show all
Defined in:
lib/nokogiri/html/document.rb

Overview

:nodoc:

Defined Under Namespace

Classes: SAXHandler

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(io) ⇒ EncodingReader

Returns a new instance of EncodingReader.



180
181
182
183
# File 'lib/nokogiri/html/document.rb', line 180

def initialize(io)
  @io = io
  @firstchunk = nil
end

Class Method Details

.detect_encoding(chunk) ⇒ Object



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/nokogiri/html/document.rb', line 161

def self.detect_encoding(chunk)
  m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/) and
    return Nokogiri.XML(m[1]).encoding

  if Nokogiri.jruby?
    m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
      return m[4]
  end

  handler = SAXHandler.new
  parser = Nokogiri::HTML::SAX::Parser.new(handler)
  catch(:found) {
    parser.parse(chunk)
  }
  handler.encoding
rescue
  nil
end

Instance Method Details

#read(len) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/nokogiri/html/document.rb', line 185

def read(len)
  # no support for a call without len

  if !@firstchunk
    @firstchunk = @io.read(len) or return nil

    # This implementation expects that the first call from
    # htmlReadIO() is made with a length long enough (~1KB) to
    # achieve advanced encoding detection.
    if encoding = EncodingReader.detect_encoding(@firstchunk)
      # The first chunk is stored for the next read in retry.
      raise EncodingFoundException, encoding
    end
  end

  ret = @firstchunk.slice!(0, len)
  if (len -= ret.length) > 0
    rest = @io.read(len) and ret << rest
  end
  if ret.empty?
    nil
  else
    ret
  end
end