Module: Virginity::EncodingDecoding

Extended by:
Encodings
Defined in:
lib/virginity/encoding_decoding.rb

Constant Summary collapse

WSP =

VALUE-CHAR = WSP / VCHAR / NON-ASCII

[0x20, 0x09]
VCHAR =

VCHAR = %x21-7E ; visible (printing) characters

0x21..0x7E
NONASCII =

NON-ASCII = %x80-FF

0x80..0xFF
CR_AND_LF =
/\r\n/
CR =
"\r"
LF =
"\n"
QP_ALSO_ENCODE =
"\x0A\x20"
QPCHAR =
/[^=]|=[\dABCDEF]{2}/
QPFOLD =

only vCard 2.1 uses encode_quoted_printable so we always use windows line endings

"=\r\n"
ENCODED_LF =

“text”: The “text” value type should be used to identify values that contain human-readable text. The character set and language in which the text is represented is controlled by the charset content-header and the language type parameter and content-header.

A formatted text line break in a text value type MUST be represented as the character sequence backslash (ASCII decimal 92) followed by a Latin small letter n (ASCII decimal 110) or a Latin capital letter N (ASCII decimal 78), that is “n” or “N”.

TODO options for saving to ascii (convert to quoted printable) or storing plain utf-8

"\\n"
CRLF =
CR + LF
BACKSLASH =
"\\"
COMMA =
","
SEMICOLON =
";"
STUFF_TO_ENCODE =
/[\n\\\,\;]/
STUFF_NOT_TO_ENCODE =
%r{[^\n\\\,\;]*}
NON_ESCAPE_OR_SEPARATOR_REGEXP =
{}

Class Method Summary collapse

Methods included from Encodings

binary?, to_ascii, to_binary, to_default, to_default!, verify_utf8ness

Class Method Details

.decode_quoted_printable(text) ⇒ Object



17
18
19
# File 'lib/virginity/encoding_decoding.rb', line 17

def self.decode_quoted_printable(text)
  text.gsub(CR_AND_LF, LF).gsub(/\=([0-9a-fA-F])?\n\s+([0-9a-fA-F])/, "=\\1\\2").unpack('M*').first
end

.decode_structured_text(value, size, separator = SEMICOLON) ⇒ Object

Compound type values are delimited by a field delimiter, specified by the SEMI-COLON character (ASCII decimal 59). A SEMI-COLON in a component of a compound property value MUST be escaped with a BACKSLASH character (ASCII decimal 92).

Lists of values are delimited by a list delimiter, specified by the COMMA character (ASCII decimal 44). A COMMA character in a value MUST be escaped with a BACKSLASH character (ASCII decimal 92).

This profile supports the type grouping mechanism defined in [MIME-DIR]. Grouping of related types is a useful technique to communicate common semantics concerning the properties of a vCard.



165
166
167
168
169
170
# File 'lib/virginity/encoding_decoding.rb', line 165

def self.decode_structured_text(value, size, separator = SEMICOLON)
  list = decode_text_list(value, separator)
  list << "" while list.size < size
  list.pop while list.size > size
  list
end

.decode_text(text) ⇒ Object



94
95
96
# File 'lib/virginity/encoding_decoding.rb', line 94

def self.decode_text(text)
  text.gsub(/\\(.)/) { $1.casecmp('n') == 0 ? LF : $1 }
end

.decode_text_list(text_list, separator = COMMA) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/virginity/encoding_decoding.rb', line 135

def self.decode_text_list(text_list, separator = COMMA)
  not_special = non_escape_or_separator_regexp(separator)
  list = []
  text = ""
  s = StringScanner.new(text_list)
  while !s.eos?
    text << s.scan(not_special)
    break if s.eos?
    case s.getch
    when BACKSLASH
      char = s.getch
      # what do I do when char is nil? ignore the backslash too? I don't know...
      raise InvalidEncoding, "text list \"#{text_list}\" ends after escape char" if char.nil?
      text << (char.casecmp('n') == 0 ? LF : char)
    when separator
      list << text
      text = ""
    else
      raise InvalidEncoding, "read #{s.matched.inspect} at #{s.pos} in #{s.string.inspect} (#{s.string.size}) using #{not_special.inspect}"
    end
  end
  list << text
  list
end

.encode_quoted_printable(text, options = {}) ⇒ Object



22
23
24
25
26
27
28
29
30
# File 'lib/virginity/encoding_decoding.rb', line 22

def self.encode_quoted_printable(text, options = {})
  options[:also_encode] ||= QP_ALSO_ENCODE
  # special_chars = /[\t ](?:[\v\t ]|$)|[=\x00-\x08\x0B-\x1F\x7F-\xFF#{options[:also_encode]}]/
  special_chars = /[=\x00-\x08\x0B-\x1F\x7F-\xFF#{options[:also_encode]}]/n
  encoded = to_binary(text).gsub(special_chars) do |char|
    char[0 ... -1] + "=%02X" % char[-1].ord
  end
  fold_quoted_printable(encoded, options[:width] || 76, options[:initial_position])
end

.encode_structured_text(list, separator = SEMICOLON) ⇒ Object



172
173
174
# File 'lib/virginity/encoding_decoding.rb', line 172

def self.encode_structured_text(list, separator = SEMICOLON)
  encode_text_list(list, separator)
end

.encode_text(text) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/virginity/encoding_decoding.rb', line 75

def self.encode_text(text)
  raise "#{text.inspect} must be a String" unless text.is_a? String
  normalize_newlines!(text)
  encoded = ""
  s = StringScanner.new(text)
  while !s.eos?
    encoded << s.scan(STUFF_NOT_TO_ENCODE)
    # 5.8.4 Backslashes, newlines, and commas must be encoded.
    case x = s.scan(STUFF_TO_ENCODE)
    when LF
      encoded << ENCODED_LF
    when BACKSLASH, COMMA, SEMICOLON
      # RFC2426 tells us to encode ":" too, which is needed for structured text fields
      encoded << BACKSLASH << x
    end
  end
  encoded
end

.encode_text_list(list, separator = COMMA) ⇒ Object



98
99
100
# File 'lib/virginity/encoding_decoding.rb', line 98

def self.encode_text_list(list, separator = COMMA)
  list.map { |value| encode_text(value) }.join(separator)
end

.fold_quoted_printable(qp_text, width = 76, initial_position = 0) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/virginity/encoding_decoding.rb', line 34

def self.fold_quoted_printable(qp_text, width = 76, initial_position = 0)
  return qp_text unless width > 5
  pos = initial_position.to_i
  scanner = StringScanner.new(qp_text)
  folded = ""
  while !scanner.eos?
    char = scanner.scan(QPCHAR)
    charsize = char.size
    if pos + charsize > width - 3
      folded << QPFOLD
      pos = 0
    end
    folded << char
    pos += charsize
  end
  folded
end

.non_escape_or_separator_regexp(separator) ⇒ Object



131
132
133
# File 'lib/virginity/encoding_decoding.rb', line 131

def self.non_escape_or_separator_regexp(separator)
  NON_ESCAPE_OR_SEPARATOR_REGEXP[separator] ||= %r{[^\\#{separator}\\\\]*}
end

.normalize_newlines!(text) ⇒ Object



53
54
55
# File 'lib/virginity/encoding_decoding.rb', line 53

def self.normalize_newlines!(text)
  text.gsub!(/\r?\n|\r/, "\n")
end