Class: Virginity::Vcard21::Parser
- Inherits:
-
Object
- Object
- Virginity::Vcard21::Parser
- Includes:
- Encodings
- Defined in:
- lib/virginity/vcard21/parser.rb
Constant Summary collapse
- CRLF_LWSP =
The process of moving from this folded multiple-line representation of a property definition to its single line representation is called “unfolding”. Unfolding is accomplished by regarding CRLF immediately followed by a LWSP-char as equivalent to the LWSP-char.
/\r?\n[\ |\t]/
- CRLF =
/\r?\n/
- ONE_OR_MORE_CRLF =
/(\r?\n)+/
- LINE_7BIT =
everything up to the beginning of CRLF_LWSP or CRLF
/[^\r\n]*/
- EQUALS =
"="
- QP_LINE_CONTINUATION =
/(.*)=\r?\n$/
- EMPTY_LINE =
base64 = <MIME RFC 1521 base64 text>
; the end of the text is marked with two CRLF sequences ; this results in one blank line before the start of the next property
if this vcard has one broken base64 field and a correct one, our nice fallback will fail. but well…
/\r?\n\s*\r?\n/
- WORD =
word := char [word]
/[^\[\]\=\:\.\,]+/
- XWORD =
/[w-]+/ ???
/[^\[\]\=\:\.\,\;]+/
- X_XWORD =
/X-[^\[\]\=\:\.\,\;]+/i
- KNOWNTYPES_LITERALS =
Regexp.union(*KNOWNTYPES)
- COMMA =
/\,/
- SEMICOLON =
/\;/
- EQUALS_REGEXP =
/=/
- WS_EQUALS_WS =
/[\ |\t]*\=[\ |\t]*/
- PARAM_KEY =
/(TYPE|VALUE|ENCODING|CHARSET|LANGUAGE)/i
- OPTIONAL_WSLS =
/(\ |\t|\r\n|\n)*/
- OPTIONAL_WS =
/[\ |\t]*/
- COLON =
/:/
- BEGIN_WS_COLON_WS =
/BEGIN[:space:]*:[:space:]*/
- VCARD =
/VCARD/i
- END_WS_COLON_WS =
/END[:space:]*:[:space:]*/
- DOT =
/\./
- BEGIN_END =
name := ‘LOGO’ / ‘PHOTO’ / ‘LABEL’ / ‘FN’ / ‘TITLE’ / ‘SOUND’ / ‘VERSION’ / ‘TEL’ / ‘EMAIL’ / ‘TZ’ / ‘GEO’ / ‘NOTE’ / ‘URL’ / ‘BDAY’ / ‘ROLE’ / ‘REV’ / ‘UID’ / ‘KEY’ / ‘MAILER’ / ‘X-’ word #; these may be “folded” name := xword # any word except begin or end, those are ‘special’
/^(BEGIN|END)$/i
Instance Method Summary collapse
-
#initialize(input, options = {}) ⇒ Parser
constructor
A new instance of Parser.
- #one_star(method) ⇒ Object
- #parse! ⇒ Object
- #parse_base64 ⇒ Object
-
#parse_beginvcard ⇒ Object
‘BEGIN’ [ws] ‘:’ [ws] ‘VCARD’ [ws] 1*crlf.
- #parse_broken_base64 ⇒ Object
- #parse_crlf ⇒ Object
-
#parse_endvcard ⇒ Object
‘END’ [ws] ‘:’ [ws] ‘VCARD’.
-
#parse_folded_literal(literal) ⇒ Object
parsing differently encoded and folded strings.
-
#parse_group ⇒ Object
group := word .
-
#parse_groups ⇒ Object
groups := groups .
-
#parse_item ⇒ Object
item := [groups] name [params] ‘:’ value crlf.
-
#parse_items ⇒ Object
( items *crlf item ) / item <— left recursion! (item *crlf) items / item <– right recursion, better for my parser 1*(item *crlf) <– simplification.
- #parse_knowntype ⇒ Object
- #parse_name ⇒ Object
- #parse_param ⇒ Object
- #parse_param_key ⇒ Object
- #parse_param_key_value ⇒ Object
-
#parse_params ⇒ Object
params := 1*(‘;’ [ws] param [ws]).
-
#parse_quoted_printable ⇒ Object
FIXME: this could be much faster in inline C, since now, we’re creating 2 objects per crlf and scanning a line at least twice.
- #parse_sevenbitascii ⇒ Object
-
#parse_start ⇒ Object
produces an array of hashes start := [wsls] vcard [wsls].
-
#parse_value ⇒ Object
value := sevenbitascii / quotedprintable / base64.
-
#parse_vcard ⇒ Object
‘BEGIN’ [ws] ‘:’ [ws] ‘VCARD’ [ws] 1*CRLF items *CRLF ‘END’ [ws] ‘:’ [ws] ‘VCARD’ vcard := beginvcard items *crlf endvcard.
- #parse_xword ⇒ Object
- #pr(s) ⇒ Object
-
#rollback(pos) ⇒ Object
helpers.
- #star(method) ⇒ Object
- #try(method) ⇒ Object
Methods included from Encodings
#binary?, #to_ascii, #to_binary, #to_default, #to_default!, #verify_utf8ness
Constructor Details
#initialize(input, options = {}) ⇒ Parser
Returns a new instance of Parser.
8 9 10 11 |
# File 'lib/virginity/vcard21/parser.rb', line 8 def initialize(input, = {}) @input = StringScanner.new(input) @options = end |
Instance Method Details
#one_star(method) ⇒ Object
46 47 48 49 |
# File 'lib/virginity/vcard21/parser.rb', line 46 def one_star(method) head = try(method) or return nil [head] + star(method) end |
#parse! ⇒ Object
17 18 19 |
# File 'lib/virginity/vcard21/parser.rb', line 17 def parse! parse_start or raise ParseError, 'error parsing vCard2.1' end |
#parse_base64 ⇒ Object
169 170 171 172 173 174 175 |
# File 'lib/virginity/vcard21/parser.rb', line 169 def parse_base64 # scan until an empty line occurs buffer = @input.scan_until(EMPTY_LINE) or return nil @input.pos -= 1 buffer.gsub!(/\s/, '') to_ascii buffer end |
#parse_beginvcard ⇒ Object
‘BEGIN’ [ws] ‘:’ [ws] ‘VCARD’ [ws] 1*crlf
281 282 283 284 285 286 287 |
# File 'lib/virginity/vcard21/parser.rb', line 281 def parse_beginvcard @input.skip(BEGIN_WS_COLON_WS) or return nil @input.skip(VCARD) or return nil @input.skip(OPTIONAL_WS) @input.skip(ONE_OR_MORE_CRLF) or return nil { :name => "BEGIN", :value => "VCARD" } end |
#parse_broken_base64 ⇒ Object
177 178 179 180 181 182 183 |
# File 'lib/virginity/vcard21/parser.rb', line 177 def parse_broken_base64 # scan until an unindented line is encountered buffer = @input.scan_until(/\n(?=[^\s])/) or return nil @input.pos -= 1 buffer.gsub!(/\s/, '') to_ascii buffer end |
#parse_crlf ⇒ Object
185 186 187 |
# File 'lib/virginity/vcard21/parser.rb', line 185 def parse_crlf @input.scan(CRLF) end |
#parse_endvcard ⇒ Object
‘END’ [ws] ‘:’ [ws] ‘VCARD’
291 292 293 294 295 |
# File 'lib/virginity/vcard21/parser.rb', line 291 def parse_endvcard @input.skip(END_WS_COLON_WS) or return nil @input.skip(VCARD) or return nil { :name => "END", :value => "VCARD" } end |
#parse_folded_literal(literal) ⇒ Object
parsing differently encoded and folded strings
55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/virginity/vcard21/parser.rb', line 55 def parse_folded_literal(literal) # pr "parse_literal(#{literal.inspect})@#{@input.pos}".white success = true literal = unescape_literal(literal) literal.split(//).each do |ch| input = @input.get_byte input = parse("'\n' one_ws") if input == "\r" # CRLF followed by LWSP needs to be regarded as LWSP == Linear White Space # pr "#{ch.inspect}==#{input.inspect}?".white literal << input success &= (ch == input) end success ? literal : nil end |
#parse_group ⇒ Object
group := word .
329 330 331 332 333 |
# File 'lib/virginity/vcard21/parser.rb', line 329 def parse_group pos = @input.pos word = @input.scan(WORD) and @input.skip(DOT) or return rollback(pos) word end |
#parse_groups ⇒ Object
groups := groups . word / word group := group*
319 320 321 322 323 324 325 |
# File 'lib/virginity/vcard21/parser.rb', line 319 def parse_groups groups = [] while x = parse_group groups << x end groups end |
#parse_item ⇒ Object
item := [groups] name [params] ‘:’ value crlf
305 306 307 308 309 310 311 312 313 314 315 |
# File 'lib/virginity/vcard21/parser.rb', line 305 def parse_item pos = @input.pos groups = parse_groups name = parse_name or return rollback(pos) @encoding = nil params = parse_params @input.skip(COLON) or return rollback(pos) value = parse_value or return rollback(pos) @input.skip(ONE_OR_MORE_CRLF) or return rollback(pos) { :groups => groups, :name => name, :params => params, :value => value } end |
#parse_items ⇒ Object
( items *crlf item ) / item <— left recursion! (item *crlf) items / item <– right recursion, better for my parser 1*(item *crlf) <– simplification
300 301 302 |
# File 'lib/virginity/vcard21/parser.rb', line 300 def parse_items one_star method(:parse_item) end |
#parse_knowntype ⇒ Object
200 201 202 203 |
# File 'lib/virginity/vcard21/parser.rb', line 200 def parse_knowntype value = (@input.scan(KNOWNTYPES_LITERALS) or @input.scan(XWORD)) or return nil Param.new("TYPE", value) end |
#parse_name ⇒ Object
338 339 340 341 342 |
# File 'lib/virginity/vcard21/parser.rb', line 338 def parse_name word = @input.scan(XWORD) or return nil return nil if word =~ BEGIN_END word end |
#parse_param ⇒ Object
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
# File 'lib/virginity/vcard21/parser.rb', line 222 def parse_param # param := ('TYPE' / 'VALUE' / 'ENCODING' / 'CHARSET' / 'LANGUAGE' / 'X-' xword) [ws] '=' [ws] xword / knowntype @input.skip(SEMICOLON) or return nil @input.skip(OPTIONAL_WS) param = (parse_param_key_value or parse_knowntype) or return nil @input.skip(OPTIONAL_WS) if param.key =~ ENCODING @encoding = case param.value when BASE64 :base64 when QUOTED_PRINTABLE :quoted_printable end end param end |
#parse_param_key ⇒ Object
251 252 253 |
# File 'lib/virginity/vcard21/parser.rb', line 251 def parse_param_key @input.scan(PARAM_KEY) or @input.scan(X_XWORD) end |
#parse_param_key_value ⇒ Object
241 242 243 244 245 246 247 248 |
# File 'lib/virginity/vcard21/parser.rb', line 241 def parse_param_key_value pos = @input.pos key = parse_param_key or return nil key.upcase! @input.skip(WS_EQUALS_WS) or return rollback(pos) value = @input.scan(XWORD) || "" Param.new(key, value) end |
#parse_params ⇒ Object
params := 1*(‘;’ [ws] param [ws])
207 208 209 210 211 212 213 214 215 216 217 218 219 |
# File 'lib/virginity/vcard21/parser.rb', line 207 def parse_params params = [] while p = parse_param params << p # some programs send us 2.1 cards with params in the 3.0-shorthand version "TYPE=fax,work # I added support for that although it is not according to the specs. if @input.scan(COMMA) val = @input.scan(XWORD) || "" params << Param.new(to_ascii(p.key), val) end end params end |
#parse_quoted_printable ⇒ Object
FIXME: this could be much faster in inline C, since now, we’re creating 2 objects per crlf and scanning a line at least twice.
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
# File 'lib/virginity/vcard21/parser.rb', line 140 def parse_quoted_printable buffer = "" while true input = @input.get_byte return buffer if input.empty? # at end of stream followed_by_crlf = !@input.match?(CRLF).nil? if input == EQUALS and followed_by_crlf parse_crlf elsif followed_by_crlf buffer << input return buffer else buffer << input end end end |
#parse_sevenbitascii ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/virginity/vcard21/parser.rb', line 89 def parse_sevenbitascii buffer = "" done = false until done if s = @input.scan(CRLF_LWSP) buffer << s[-1] unless @options[:vcard21_line_folding_with_extra_space] elsif @input.check(CRLF) return to_ascii(buffer) else buffer << @input.get_byte end end to_ascii buffer end |
#parse_start ⇒ Object
produces an array of hashes start := [wsls] vcard [wsls]
260 261 262 263 264 265 |
# File 'lib/virginity/vcard21/parser.rb', line 260 def parse_start @input.skip(OPTIONAL_WSLS) vcard = parse_vcard or return nil @input.skip(OPTIONAL_WSLS) vcard end |
#parse_value ⇒ Object
value := sevenbitascii / quotedprintable / base64
345 346 347 348 349 350 351 352 353 354 |
# File 'lib/virginity/vcard21/parser.rb', line 345 def parse_value case @encoding when :quoted_printable parse_quoted_printable when :base64 parse_base64 or parse_broken_base64 else parse_sevenbitascii end end |
#parse_vcard ⇒ Object
‘BEGIN’ [ws] ‘:’ [ws] ‘VCARD’ [ws] 1*CRLF items *CRLF ‘END’ [ws] ‘:’ [ws] ‘VCARD’ vcard := beginvcard items *crlf endvcard
269 270 271 272 273 274 275 |
# File 'lib/virginity/vcard21/parser.rb', line 269 def parse_vcard beginvcard = parse_beginvcard or return nil items = parse_items or return nil @input.skip(ONE_OR_MORE_CRLF) # and ignore it if there are none endvcard = parse_endvcard or return nil [beginvcard] + items + [endvcard] end |
#parse_xword ⇒ Object
195 196 197 |
# File 'lib/virginity/vcard21/parser.rb', line 195 def parse_xword @input.scan(XWORD) end |
#pr(s) ⇒ Object
13 14 15 |
# File 'lib/virginity/vcard21/parser.rb', line 13 def pr(s) puts "#{@input.pos}:\t#{@input.peek(40).inspect}\t#{s}" end |
#rollback(pos) ⇒ Object
helpers
25 26 27 28 |
# File 'lib/virginity/vcard21/parser.rb', line 25 def rollback(pos) @input.pos = pos nil end |
#star(method) ⇒ Object
38 39 40 41 42 43 44 |
# File 'lib/virginity/vcard21/parser.rb', line 38 def star(method) list = [] until (result = try(method)).nil? do list << result end list end |
#try(method) ⇒ Object
30 31 32 33 34 35 36 |
# File 'lib/virginity/vcard21/parser.rb', line 30 def try(method) pos = @input.pos catch :rollback do return (method.call or throw :rollback) end rollback(pos) end |