Module: Virginity::FieldCleaning

Included in:
BaseField
Defined in:
lib/virginity/vcard/cleaning.rb

Constant Summary collapse

LIST_NAMES =

remove QUOTED-PRINTABLE-encoding

According to vcard21.doc QUOTED-PRINTABLE cannot occur in structured text and separated text … but from experience we know it does.

Note: reencoding could fail because the characters are not encodable as text

%w(CATEGORIES)
QUOTED_PRINTABLE =
/^quoted-printable$/i
ENCODING =
/^ENCODING$/i
CHARSET =
"CHARSET"
BOM_UTF8 =

Why do we have two boms? well duh, the string could be in either of those encodings!

[65279].pack('U')
BOM_BINARY =
BOM_UTF8.dup.force_encoding(Encoding::BINARY)
CASE_SENSITIVE_TYPES =
/^(DOM|INTL|POSTAL|PARCEL|HOME|WORK|OTHER|PREF|VOICE|FAX|MSG|CELL|PAGER|BBS|MODEM|CAR|ISDN|VIDEO|AOL|APPLELINK|ATTMAIL|CIS|EWORLD|INTERNET|IBMMAIL|MCIMAIL|POWERSHARE|PRODIGY|TLX|X400|GIF|CGM|WMF|BMP|MET|PMB|DIB|PICT|TIFF|PDF|PS|JPEG|QTIME|MPEG|MPEG2|AVI|WAVE|AIFF|PCM|X509|PGP)$/i
TYPE =
"TYPE"
X_SYNTHESIS_REF =
/^X-Synthesis-Ref\d*$/i

Instance Method Summary collapse

Instance Method Details

#clean!Object



5
6
7
8
9
10
11
12
13
14
15
16
# File 'lib/virginity/vcard/cleaning.rb', line 5

def clean!
  clean_quoted_printable_encoding!
  clean_base64!
  clean_binary_data!
  clean_charsets!
  guess_latin!
  remove_encoding_8bit!
  remove_x_synthesis_ref_params!
  remove_bom!
  clean_types!
  uniq_params!
end

#clean_base64!Object

convert BASE64 to b



44
45
46
47
48
49
50
# File 'lib/virginity/vcard/cleaning.rb', line 44

def clean_base64!
  @params.each do |param|
    next unless param.key =~ ENCODING and param.value =~ /^base64$/i
    param.value = "b"
  end
  self
end

#clean_binary_data!Object



52
53
54
55
56
# File 'lib/virginity/vcard/cleaning.rb', line 52

def clean_binary_data!
  return unless @params.any? {|param| param.key =~ ENCODING and param.value =~ /^b$/i }
  @value.gsub!(/\s/, '')
  self
end

#clean_charsets!Object



64
65
66
67
68
69
70
# File 'lib/virginity/vcard/cleaning.rb', line 64

def clean_charsets!
  return unless charset = @params.find { |param| param.key.casecmp(CHARSET) == 0 }
  @value.encode!(Encoding::UTF_8, charset.value) unless charset.value == "UTF-8"
  @value = @value.force_encoding(Encoding::UTF_8)
  @params.delete charset
  self
end

#clean_quoted_printable_encoding!Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/virginity/vcard/cleaning.rb', line 27

def clean_quoted_printable_encoding!
  return unless @params.any? {|p| p.key =~ ENCODING and p.value =~ QUOTED_PRINTABLE }
  if @value.include?(";") # if the unencoded value contains ";" it's a list (or a structured value)
    v = @value.split(";").map { |e| EncodingDecoding::decode_quoted_printable(e) }
    @value = EncodingDecoding::encode_text_list(v, ";")
  elsif LIST_NAMES.include?(@name) or @value.include?(",") # kludge
    v = @value.split(",").map { |e| EncodingDecoding::decode_quoted_printable(e) }
    @value = EncodingDecoding::encode_text_list(v, ",")
  else
    v = EncodingDecoding::decode_quoted_printable(@value)
    @value = EncodingDecoding::encode_text(v)
  end
  @params.delete_if {|p| p.key =~ ENCODING and p.value =~ QUOTED_PRINTABLE }
  self
end

#clean_types!Object



87
88
89
90
91
92
# File 'lib/virginity/vcard/cleaning.rb', line 87

def clean_types!
  params(TYPE).each do |type|
    type.value.upcase! if type.value =~ CASE_SENSITIVE_TYPES
  end
  self
end

#guess_latin!Object



105
106
107
108
# File 'lib/virginity/vcard/cleaning.rb', line 105

def guess_latin!
  return if @value.valid_encoding?
  @value.encode!(Encoding::UTF_8, "ISO-8859-1")
end

#remove_bom!Object



75
76
77
78
79
80
81
82
83
# File 'lib/virginity/vcard/cleaning.rb', line 75

def remove_bom!
  if @value.encoding == Encoding::BINARY
    @value.gsub!(BOM_BINARY, '')
  else
    # if it's not utf-8, it's callers fault.
    @value.gsub!(BOM_UTF8, '') # remove the BOM
  end
  self
end

#remove_encoding_8bit!Object

since it’s already implicitly encoded in 8 bits…



58
59
60
61
# File 'lib/virginity/vcard/cleaning.rb', line 58

def remove_encoding_8bit! # since it's already implicitly encoded in 8 bits...
  @params.delete_if {|param| param.key =~ ENCODING and param.value =~ /^8BIT$/ }
  self
end

#remove_x_synthesis_ref_params!Object



95
96
97
98
# File 'lib/virginity/vcard/cleaning.rb', line 95

def remove_x_synthesis_ref_params!
  @params.delete_if {|p| p.key =~ X_SYNTHESIS_REF or p.value =~ X_SYNTHESIS_REF }
  self
end

#uniq_params!Object



100
101
102
103
# File 'lib/virginity/vcard/cleaning.rb', line 100

def uniq_params!
  params.uniq!
  self
end