Method: Webclient::Response#text

Defined in:
lib/webclient/webclient.rb

#text(encoding: 'UTF-8') ⇒ Object

todo/check: rename encoding to html/http-like charset - why? why not?



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/webclient/webclient.rb', line 12

def text( encoding: 'UTF-8' )
  # note: Net::HTTP will NOT set encoding UTF-8 etc.

  # will be set to ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here

  # thus, set/force encoding to utf-8

  text = @response.body.to_s
  if encoding.downcase == 'utf-8'
     text = text.force_encoding( Encoding::UTF_8 )
  else
## [debug] GET=http://www.football-data.co.uk/mmz4281/0405/SC0.csv

##    Encoding::UndefinedConversionError: "\xA0" from ASCII-8BIT to UTF-8

##     note:  0xA0 (160) is NBSP (non-breaking space) in Windows-1252


## note: assume windows encoding (for football-data.uk)

##   use "Windows-1252" for input and convert to utf-8

##

##    see https://www.justinweiss.com/articles/3-steps-to-fix-encoding-problems-in-ruby/

##    see https://en.wikipedia.org/wiki/Windows-1252

## txt = txt.force_encoding( 'Windows-1252' )

## txt = txt.encode( 'UTF-8' )

##   Encoding::UTF_8 => 'UTF-8'

    puts " [debug] converting response.text encoding from >#{encoding}< to >UTF-8<"

    text = text.force_encoding( encoding )
    text = text.encode( Encoding::UTF_8 )
  end

  text
end