Module: TMail::TextUtils

Included in:
TMail, Address, Attachment, Decoder, Encoder, HeaderField, Mail
Defined in:
lib/tmail/utils.rb

Overview

Text Utils provides a namespace to define TOKENs, ATOMs, PHRASEs and CONTROL characters that are OK per RFC 2822.

It also provides methods you can call to determine if a string is safe

Constant Summary collapse

CONTROL_CHAR =
/[#{control}]/n
ATOM_UNSAFE =
/[#{Regexp.quote aspecial}#{control}#{lwsp}]/n
PHRASE_UNSAFE =
/[#{Regexp.quote aspecial}#{control}]/n
TOKEN_UNSAFE =
/[#{Regexp.quote tspecial}#{control}#{lwsp}]/n
ZONESTR_TABLE =

:stopdoc:

{
  'jst' =>   9 * 60,
  'eet' =>   2 * 60,
  'bst' =>   1 * 60,
  'met' =>   1 * 60,
  'gmt' =>   0,
  'utc' =>   0,
  'ut'  =>   0,
  'nst' => -(3 * 60 + 30),
  'ast' =>  -4 * 60,
  'edt' =>  -4 * 60,
  'est' =>  -5 * 60,
  'cdt' =>  -5 * 60,
  'cst' =>  -6 * 60,
  'mdt' =>  -6 * 60,
  'mst' =>  -7 * 60,
  'pdt' =>  -7 * 60,
  'pst' =>  -8 * 60,
  'a'   =>  -1 * 60,
  'b'   =>  -2 * 60,
  'c'   =>  -3 * 60,
  'd'   =>  -4 * 60,
  'e'   =>  -5 * 60,
  'f'   =>  -6 * 60,
  'g'   =>  -7 * 60,
  'h'   =>  -8 * 60,
  'i'   =>  -9 * 60,
  # j not use
  'k'   => -10 * 60,
  'l'   => -11 * 60,
  'm'   => -12 * 60,
  'n'   =>   1 * 60,
  'o'   =>   2 * 60,
  'p'   =>   3 * 60,
  'q'   =>   4 * 60,
  'r'   =>   5 * 60,
  's'   =>   6 * 60,
  't'   =>   7 * 60,
  'u'   =>   8 * 60,
  'v'   =>   9 * 60,
  'w'   =>  10 * 60,
  'x'   =>  11 * 60,
  'y'   =>  12 * 60,
  'z'   =>   0 * 60
}
WDAY =

:stopdoc:

%w( Sun Mon Tue Wed Thu Fri Sat TMailBUG )
MONTH =
%w( TMailBUG Jan Feb Mar Apr May Jun
Jul Aug Sep Oct Nov Dec TMailBUG )
MESSAGE_ID =
/<[^\@>]+\@[^>]+>/
MIME_ENCODED =
/=\?[^\s?=]+\?[QB]\?[^\s?=]+\?=/i
NKF_FLAGS =
{
  'EUC'  => '-e -m',
  'SJIS' => '-s -m'
}
RFC2231_ENCODED =
/\A(?:iso-2022-jp|euc-jp|shift_jis|us-ascii)?'[a-z]*'/in

Instance Method Summary collapse

Instance Method Details

#atom_safe?(str) ⇒ Boolean

Returns true if the string supplied is free from characters not allowed as an ATOM

Returns:

  • (Boolean)


123
124
125
# File 'lib/tmail/utils.rb', line 123

def atom_safe?( str )
  not ATOM_UNSAFE === str
end

#decode_params(hash) ⇒ Object



272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/tmail/utils.rb', line 272

def decode_params( hash )
  new = Hash.new
  encoded = nil
  hash.each do |key, value|
    if m = /\*(?:(\d+)\*)?\z/.match(key)
      ((encoded ||= {})[m.pre_match] ||= [])[(m[1] || 0).to_i] = value
    else
      new[key] = to_kcode(value)
    end
  end
  if encoded
    encoded.each do |key, strings|
      new[key] = decode_RFC2231(strings.join(''))
    end
  end

  new
end

#decode_RFC2231(str) ⇒ Object



303
304
305
306
307
308
309
310
# File 'lib/tmail/utils.rb', line 303

def decode_RFC2231( str )
  m = RFC2231_ENCODED.match(str) or return str
  begin
    to_kcode(m.post_match.gsub(/%[\da-f]{2}/in) {|s| s[1,2].hex.chr })
  rescue
    m.post_match.gsub(/%[\da-f]{2}/in, "")
  end
end

#join_domain(arr) ⇒ Object

Provides a method to join a domain name by it’s parts and also makes it ATOM safe by quoting it as needed



169
170
171
172
173
174
175
176
177
# File 'lib/tmail/utils.rb', line 169

def join_domain( arr )
  arr.map {|i|
      if /\A\[.*\]\z/ === i
        i
      else
        quote_atom(i)
      end
  }.join('.')
end

#message_id?(str) ⇒ Boolean

Returns:

  • (Boolean)


260
261
262
# File 'lib/tmail/utils.rb', line 260

def message_id?( str )
  MESSAGE_ID === str
end

#mime_encoded?(str) ⇒ Boolean

Returns:

  • (Boolean)


267
268
269
# File 'lib/tmail/utils.rb', line 267

def mime_encoded?( str )
  MIME_ENCODED === str
end

#quote_atom(str) ⇒ Object

If the string supplied has ATOM unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified



129
130
131
# File 'lib/tmail/utils.rb', line 129

def quote_atom( str )
  (ATOM_UNSAFE === str) ? dquote(str) : str
end

#quote_boundaryObject



312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# File 'lib/tmail/utils.rb', line 312

def quote_boundary
  # Make sure the Content-Type boundary= parameter is quoted if it contains illegal characters
  # (to ensure any special characters in the boundary text are escaped from the parser
  # (such as = in MS Outlook's boundary text))
  if @body =~ /^(.*)boundary=(.*)$/m
    preamble = $1
    remainder = $2
    if remainder =~ /;/
      remainder =~ /^(.*?)(;.*)$/m
      boundary_text = $1
      post = $2.chomp
    else
      boundary_text = remainder.chomp
    end
    if boundary_text =~ /[\/\?\=]/
      boundary_text = "\"#{boundary_text}\"" unless boundary_text =~ /^".*?"$/
      @body = "#{preamble}boundary=#{boundary_text}#{post}"
    end
  end
end

#quote_phrase(str) ⇒ Object

If the string supplied has PHRASE unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified



135
136
137
# File 'lib/tmail/utils.rb', line 135

def quote_phrase( str )
  (PHRASE_UNSAFE === str) ? dquote(str) : str
end

#quote_token(str) ⇒ Object

If the string supplied has TOKEN unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified



146
147
148
# File 'lib/tmail/utils.rb', line 146

def quote_token( str )
  (TOKEN_UNSAFE === str) ? dquote(str) : str
end

#quote_unquoted_bencodeObject

AppleMail generates illegal character contained Content-Type parameter like:

name==?ISO-2022-JP?B?...=?=

so quote. (This case is only value fits in one line.)



336
337
338
339
340
341
342
343
344
# File 'lib/tmail/utils.rb', line 336

def quote_unquoted_bencode
  @body = @body.gsub(%r"(;\s+[-a-z]+=)(=\?.+?)([;\r\n ]|\z)"m) {
    head, should_quoted, tail = $~.captures
    # head: "; name="
    # should_quoted: "=?ISO-2022-JP?B?...=?="

    head << quote_token(should_quoted) << tail
  }
end

#quote_unquoted_nameObject

AppleMail generates name=filename attributes in the content type that contain spaces. Need to handle this so the TMail Parser can.



348
349
350
351
352
353
354
355
# File 'lib/tmail/utils.rb', line 348

def quote_unquoted_name
  @body = @body.gsub(%r|(name=)([\w\s.]+)(.*)|m) {
    head, should_quoted, tail = $~.captures
    # head: "; name="
    # should_quoted: "=?ISO-2022-JP?B?...=?="
    head  << quote_token(should_quoted) << tail
  }
end

#time2str(tm) ⇒ Object



244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/tmail/utils.rb', line 244

def time2str( tm )
  # [ruby-list:7928]
  gmt = Time.at(tm.to_i)
  gmt.gmtime
  offset = tm.to_i - Time.local(*gmt.to_a[0,6].reverse).to_i

  # DO NOT USE strftime: setlocale() breaks it
  sprintf '%s, %s %s %d %02d:%02d:%02d %+.2d%.2d',
          WDAY[tm.wday], tm.mday, MONTH[tm.month],
          tm.year, tm.hour, tm.min, tm.sec,
          *(offset / 60).divmod(60)
end

#timezone_string_to_unixtime(str) ⇒ Object

Takes a time zone string from an EMail and converts it to Unix Time (seconds)



228
229
230
231
232
233
234
235
236
237
# File 'lib/tmail/utils.rb', line 228

def timezone_string_to_unixtime( str )
  if m = /([\+\-])(\d\d?)(\d\d)/.match(str)
    sec = (m[2].to_i * 60 + m[3].to_i) * 60
    m[1] == '-' ? -sec : sec
  else
    min = ZONESTR_TABLE[str.downcase] or
            raise SyntaxError, "wrong timezone format '#{str}'"
    min * 60
  end
end

#to_kcode(str) ⇒ Object



296
297
298
299
# File 'lib/tmail/utils.rb', line 296

def to_kcode( str )
  flag = NKF_FLAGS[TMail.KCODE] or return str
  NKF.nkf(flag, str)
end

#token_safe?(str) ⇒ Boolean

Returns true if the string supplied is free from characters not allowed as a TOKEN

Returns:

  • (Boolean)


140
141
142
# File 'lib/tmail/utils.rb', line 140

def token_safe?( str )
  not TOKEN_UNSAFE === str
end

#unquote(str) ⇒ Object

Unwraps supplied string from inside double quotes Returns unquoted string



163
164
165
# File 'lib/tmail/utils.rb', line 163

def unquote( str )
  str =~ /^"(.*?)"$/m ? $1 : str
end