Module: Virginity::FieldCleaning
- Included in:
- BaseField
- Defined in:
- lib/virginity/vcard/cleaning.rb
Constant Summary collapse
- LIST_NAMES =
remove QUOTED-PRINTABLE-encoding
According to vcard21.doc QUOTED-PRINTABLE cannot occur in structured text and separated text … but from experience we know it does.
Note: reencoding could fail because the characters are not encodable as text
%w(CATEGORIES)
- QUOTED_PRINTABLE =
/^quoted-printable$/i
- ENCODING =
/^ENCODING$/i
- CHARSET =
"CHARSET"
- BOM_UTF8 =
Why do we have two boms? well duh, the string could be in either of those encodings!
[65279].pack('U')
- BOM_BINARY =
BOM_UTF8.dup.force_encoding(Encoding::BINARY)
- CASE_SENSITIVE_TYPES =
/^(DOM|INTL|POSTAL|PARCEL|HOME|WORK|OTHER|PREF|VOICE|FAX|MSG|CELL|PAGER|BBS|MODEM|CAR|ISDN|VIDEO|AOL|APPLELINK|ATTMAIL|CIS|EWORLD|INTERNET|IBMMAIL|MCIMAIL|POWERSHARE|PRODIGY|TLX|X400|GIF|CGM|WMF|BMP|MET|PMB|DIB|PICT|TIFF|PDF|PS|JPEG|QTIME|MPEG|MPEG2|AVI|WAVE|AIFF|PCM|X509|PGP)$/i
- TYPE =
"TYPE"
- X_SYNTHESIS_REF =
/^X-Synthesis-Ref\d*$/i
Instance Method Summary collapse
- #clean! ⇒ Object
-
#clean_base64! ⇒ Object
convert BASE64 to b.
- #clean_binary_data! ⇒ Object
- #clean_charsets! ⇒ Object
- #clean_quoted_printable_encoding! ⇒ Object
- #clean_types! ⇒ Object
- #guess_latin! ⇒ Object
- #remove_bom! ⇒ Object
-
#remove_encoding_8bit! ⇒ Object
since it’s already implicitly encoded in 8 bits…
- #remove_x_synthesis_ref_params! ⇒ Object
- #uniq_params! ⇒ Object
Instance Method Details
#clean! ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 |
# File 'lib/virginity/vcard/cleaning.rb', line 5 def clean! clean_quoted_printable_encoding! clean_base64! clean_binary_data! clean_charsets! guess_latin! remove_encoding_8bit! remove_x_synthesis_ref_params! remove_bom! clean_types! uniq_params! end |
#clean_base64! ⇒ Object
convert BASE64 to b
44 45 46 47 48 49 50 |
# File 'lib/virginity/vcard/cleaning.rb', line 44 def clean_base64! @params.each do |param| next unless param.key =~ ENCODING and param.value =~ /^base64$/i param.value = "b" end self end |
#clean_binary_data! ⇒ Object
52 53 54 55 56 |
# File 'lib/virginity/vcard/cleaning.rb', line 52 def clean_binary_data! return unless @params.any? {|param| param.key =~ ENCODING and param.value =~ /^b$/i } @value.gsub!(/\s/, '') self end |
#clean_charsets! ⇒ Object
64 65 66 67 68 69 70 |
# File 'lib/virginity/vcard/cleaning.rb', line 64 def clean_charsets! return unless charset = @params.find { |param| param.key.casecmp(CHARSET) == 0 } @value.encode!(Encoding::UTF_8, charset.value) unless charset.value == "UTF-8" @value = @value.force_encoding(Encoding::UTF_8) @params.delete charset self end |
#clean_quoted_printable_encoding! ⇒ Object
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/virginity/vcard/cleaning.rb', line 27 def clean_quoted_printable_encoding! return unless @params.any? {|p| p.key =~ ENCODING and p.value =~ QUOTED_PRINTABLE } if @value.include?(";") # if the unencoded value contains ";" it's a list (or a structured value) v = @value.split(";").map { |e| EncodingDecoding::decode_quoted_printable(e) } @value = EncodingDecoding::encode_text_list(v, ";") elsif LIST_NAMES.include?(@name) or @value.include?(",") # kludge v = @value.split(",").map { |e| EncodingDecoding::decode_quoted_printable(e) } @value = EncodingDecoding::encode_text_list(v, ",") else v = EncodingDecoding::decode_quoted_printable(@value) @value = EncodingDecoding::encode_text(v) end @params.delete_if {|p| p.key =~ ENCODING and p.value =~ QUOTED_PRINTABLE } self end |
#clean_types! ⇒ Object
87 88 89 90 91 92 |
# File 'lib/virginity/vcard/cleaning.rb', line 87 def clean_types! params(TYPE).each do |type| type.value.upcase! if type.value =~ CASE_SENSITIVE_TYPES end self end |
#guess_latin! ⇒ Object
105 106 107 108 |
# File 'lib/virginity/vcard/cleaning.rb', line 105 def guess_latin! return if @value.valid_encoding? @value.encode!(Encoding::UTF_8, "ISO-8859-1") end |
#remove_bom! ⇒ Object
75 76 77 78 79 80 81 82 83 |
# File 'lib/virginity/vcard/cleaning.rb', line 75 def remove_bom! if @value.encoding == Encoding::BINARY @value.gsub!(BOM_BINARY, '') else # if it's not utf-8, it's callers fault. @value.gsub!(BOM_UTF8, '') # remove the BOM end self end |
#remove_encoding_8bit! ⇒ Object
since it’s already implicitly encoded in 8 bits…
58 59 60 61 |
# File 'lib/virginity/vcard/cleaning.rb', line 58 def remove_encoding_8bit! # since it's already implicitly encoded in 8 bits... @params.delete_if {|param| param.key =~ ENCODING and param.value =~ /^8BIT$/ } self end |
#remove_x_synthesis_ref_params! ⇒ Object
95 96 97 98 |
# File 'lib/virginity/vcard/cleaning.rb', line 95 def remove_x_synthesis_ref_params! @params.delete_if {|p| p.key =~ X_SYNTHESIS_REF or p.value =~ X_SYNTHESIS_REF } self end |
#uniq_params! ⇒ Object
100 101 102 103 |
# File 'lib/virginity/vcard/cleaning.rb', line 100 def uniq_params! params.uniq! self end |