Class: TMail::Unquoter

Inherits:
Object show all
Defined in:
lib/tmail/quoting.rb

Class Method Summary collapse

Class Method Details

.convert_to_with_fallback_on_iso_8859_1(text, to, from) ⇒ Object Also known as: convert_to


104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/tmail/quoting.rb', line 104

def convert_to_with_fallback_on_iso_8859_1(text, to, from)
  return text if to == 'utf-8' and text.isutf8

  if from.blank? and !text.is_binary_data?
    from = CharDet.detect(text)['encoding']

    # Chardet ususally detects iso-8859-2 (aka windows-1250), but the text is
    # iso-8859-1 (aka windows-1252 and Latin1). http://en.wikipedia.org/wiki/ISO/IEC_8859-2
    # This can cause unwanted characters, like ŕ instead of à.
    # (I know, could be a very bad decision...)
    from = 'iso-8859-1' if from =~ /iso-8859-2/i
  end

  begin
    convert_to_without_fallback_on_iso_8859_1(text, to, from)
  rescue Iconv::InvalidCharacter
    unless from == 'iso-8859-1'
      from = 'iso-8859-1'
      retry
    end
  end
end

.unquote_and_convert_to(text, to_charset, from_charset = "iso-8859-1", preserve_underscores = false) ⇒ Object


79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/tmail/quoting.rb', line 79

def unquote_and_convert_to(text, to_charset, from_charset = "iso-8859-1", preserve_underscores=false)
  return "" if text.nil?
  text.gsub!(/\?=(\s*)=\?/, '?==?') # Remove whitespaces between 'encoded-word's
  text.gsub(/(.*?)(?:(?:=\?(.*?)\?(.)\?(.*?)\?=)|$)/) do
    before = $1
    from_charset = $2
    quoting_method = $3
    text = $4

    before = convert_to(before, to_charset, from_charset) if before.length > 0
    before + case quoting_method
        when "q", "Q" then
          unquote_quoted_printable_and_convert_to(text, to_charset, from_charset, preserve_underscores)
        when "b", "B" then
          unquote_base64_and_convert_to(text, to_charset, from_charset)
        when nil then
          # will be nil at the end of the string, due to the nature of
          # the regex used.
          ""
        else
          raise "unknown quoting method #{quoting_method.inspect}"
      end
  end
end

.unquote_base64_and_convert_to(text, to, from) ⇒ Object


133
134
135
# File 'lib/tmail/quoting.rb', line 133

def unquote_base64_and_convert_to(text, to, from)
  convert_to(Base64.decode(text), to, from)
end

.unquote_quoted_printable_and_convert_to(text, to, from, preserve_underscores = false) ⇒ Object


127
128
129
130
131
# File 'lib/tmail/quoting.rb', line 127

def unquote_quoted_printable_and_convert_to(text, to, from, preserve_underscores=false)
  text = text.gsub(/_/, " ") unless preserve_underscores
  text = text.gsub(/\r\n|\r/, "\n") # normalize newlines
  convert_to(text.unpack("M*").first, to, from)
end