Class: Virginity::Vcard21::Parser

Inherits:
Object
  • Object
show all
Includes:
Encodings
Defined in:
lib/virginity/vcard21/parser.rb

Constant Summary collapse

CRLF_LWSP =

The process of moving from this folded multiple-line representation of a property definition to its single line representation is called “unfolding”. Unfolding is accomplished by regarding CRLF immediately followed by a LWSP-char as equivalent to the LWSP-char.

/\r?\n[\ |\t]/
CRLF =
/\r?\n/
ONE_OR_MORE_CRLF =
/(\r?\n)+/
LINE_7BIT =

everything up to the beginning of CRLF_LWSP or CRLF

/[^\r\n]*/
EQUALS =
"="
QP_LINE_CONTINUATION =
/(.*)=\r?\n$/
EMPTY_LINE =

base64 = <MIME RFC 1521 base64 text>

; the end of the text is marked with two CRLF sequences
; this results in one blank line before the start of the next property

if this vcard has one broken base64 field and a correct one, our nice fallback will fail. but well…

/\r?\n\s*\r?\n/
WORD =

word := char [word]

/[^\[\]\=\:\.\,]+/
XWORD =

/[w-]+/ ???

/[^\[\]\=\:\.\,\;]+/
X_XWORD =
/X-[^\[\]\=\:\.\,\;]+/i
KNOWNTYPES_LITERALS =
Regexp.union(*KNOWNTYPES)
COMMA =
/\,/
SEMICOLON =
/\;/
EQUALS_REGEXP =
/=/
WS_EQUALS_WS =
/[\ |\t]*\=[\ |\t]*/
PARAM_KEY =
/(TYPE|VALUE|ENCODING|CHARSET|LANGUAGE)/i
OPTIONAL_WSLS =
/(\ |\t|\r\n|\n)*/
OPTIONAL_WS =
/[\ |\t]*/
COLON =
/:/
BEGIN_WS_COLON_WS =
/BEGIN[:space:]*:[:space:]*/
VCARD =
/VCARD/i
END_WS_COLON_WS =
/END[:space:]*:[:space:]*/
DOT =
/\./
BEGIN_END =

name := ‘LOGO’ / ‘PHOTO’ / ‘LABEL’ / ‘FN’ / ‘TITLE’ / ‘SOUND’ / ‘VERSION’ / ‘TEL’ / ‘EMAIL’ / ‘TZ’ / ‘GEO’ / ‘NOTE’ / ‘URL’ / ‘BDAY’ / ‘ROLE’ / ‘REV’ / ‘UID’ / ‘KEY’ / ‘MAILER’ / ‘X-’ word #; these may be “folded” name := xword # any word except begin or end, those are ‘special’

/^(BEGIN|END)$/i

Instance Method Summary collapse

Methods included from Encodings

#binary?, #to_ascii, #to_binary, #to_default, #to_default!, #verify_utf8ness

Constructor Details

#initialize(input, options = {}) ⇒ Parser

Returns a new instance of Parser.



8
9
10
11
# File 'lib/virginity/vcard21/parser.rb', line 8

def initialize(input, options = {})
  @input = StringScanner.new(input)
  @options = options
end

Instance Method Details

#one_star(method) ⇒ Object



46
47
48
49
# File 'lib/virginity/vcard21/parser.rb', line 46

def one_star(method)
  head = try(method) or return nil
  [head] + star(method)
end

#parse!Object



17
18
19
# File 'lib/virginity/vcard21/parser.rb', line 17

def parse!
  parse_start or raise ParseError, 'error parsing vCard2.1'
end

#parse_base64Object



169
170
171
172
173
174
175
# File 'lib/virginity/vcard21/parser.rb', line 169

def parse_base64
  # scan until an empty line occurs
  buffer = @input.scan_until(EMPTY_LINE) or return nil
  @input.pos -= 1
  buffer.gsub!(/\s/, '')
  to_ascii buffer
end

#parse_beginvcardObject

‘BEGIN’ [ws] ‘:’ [ws] ‘VCARD’ [ws] 1*crlf



281
282
283
284
285
286
287
# File 'lib/virginity/vcard21/parser.rb', line 281

def parse_beginvcard
  @input.skip(BEGIN_WS_COLON_WS) or return nil
  @input.skip(VCARD) or return nil
  @input.skip(OPTIONAL_WS)
  @input.skip(ONE_OR_MORE_CRLF) or return nil
  { :name => "BEGIN", :value => "VCARD" }
end

#parse_broken_base64Object



177
178
179
180
181
182
183
# File 'lib/virginity/vcard21/parser.rb', line 177

def parse_broken_base64
  # scan until an unindented line is encountered
  buffer = @input.scan_until(/\n(?=[^\s])/) or return nil
  @input.pos -= 1
  buffer.gsub!(/\s/, '')
  to_ascii buffer
end

#parse_crlfObject



185
186
187
# File 'lib/virginity/vcard21/parser.rb', line 185

def parse_crlf
  @input.scan(CRLF)
end

#parse_endvcardObject

‘END’ [ws] ‘:’ [ws] ‘VCARD’



291
292
293
294
295
# File 'lib/virginity/vcard21/parser.rb', line 291

def parse_endvcard
  @input.skip(END_WS_COLON_WS) or return nil
  @input.skip(VCARD) or return nil
  { :name => "END", :value => "VCARD" }
end

#parse_folded_literal(literal) ⇒ Object

parsing differently encoded and folded strings



55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/virginity/vcard21/parser.rb', line 55

def parse_folded_literal(literal)
  # pr "parse_literal(#{literal.inspect})@#{@input.pos}".white
  success = true
  literal = unescape_literal(literal)
  literal.split(//).each do |ch|
    input = @input.get_byte
    input = parse("'\n' one_ws") if input == "\r" # CRLF followed by LWSP needs to be regarded as LWSP == Linear White Space
    # pr "#{ch.inspect}==#{input.inspect}?".white
    literal << input
    success &= (ch == input)
  end
  success ? literal : nil
end

#parse_groupObject

group := word .



329
330
331
332
333
# File 'lib/virginity/vcard21/parser.rb', line 329

def parse_group
  pos = @input.pos
  word = @input.scan(WORD) and @input.skip(DOT) or return rollback(pos)
  word
end

#parse_groupsObject

groups := groups . word / word group := group*



319
320
321
322
323
324
325
# File 'lib/virginity/vcard21/parser.rb', line 319

def parse_groups
  groups = []
  while x = parse_group
    groups << x
  end
  groups
end

#parse_itemObject

item := [groups] name [params] ‘:’ value crlf



305
306
307
308
309
310
311
312
313
314
315
# File 'lib/virginity/vcard21/parser.rb', line 305

def parse_item
  pos = @input.pos
  groups = parse_groups
  name = parse_name or return rollback(pos)
  @encoding = nil
  params = parse_params
  @input.skip(COLON) or return rollback(pos)
  value = parse_value or return rollback(pos)
  @input.skip(ONE_OR_MORE_CRLF) or return rollback(pos)
  { :groups => groups, :name => name, :params => params, :value => value }
end

#parse_itemsObject

( items *crlf item ) / item <— left recursion! (item *crlf) items / item <– right recursion, better for my parser 1*(item *crlf) <– simplification



300
301
302
# File 'lib/virginity/vcard21/parser.rb', line 300

def parse_items
  one_star method(:parse_item)
end

#parse_knowntypeObject



200
201
202
203
# File 'lib/virginity/vcard21/parser.rb', line 200

def parse_knowntype
  value = (@input.scan(KNOWNTYPES_LITERALS) or @input.scan(XWORD)) or return nil
  Param.new("TYPE", value)
end

#parse_nameObject



338
339
340
341
342
# File 'lib/virginity/vcard21/parser.rb', line 338

def parse_name
  word = @input.scan(XWORD) or return nil
  return nil if word =~ BEGIN_END
  word
end

#parse_paramObject



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/virginity/vcard21/parser.rb', line 222

def parse_param
  # param := ('TYPE' / 'VALUE' / 'ENCODING' / 'CHARSET' / 'LANGUAGE' / 'X-' xword) [ws] '=' [ws] xword / knowntype
  @input.skip(SEMICOLON) or return nil
  @input.skip(OPTIONAL_WS)
  param = (parse_param_key_value or parse_knowntype) or return nil
  @input.skip(OPTIONAL_WS)
  if param.key =~ ENCODING
    @encoding = case param.value
    when BASE64
      :base64
    when QUOTED_PRINTABLE
      :quoted_printable
    end
  end
  param
end

#parse_param_keyObject



251
252
253
# File 'lib/virginity/vcard21/parser.rb', line 251

def parse_param_key
  @input.scan(PARAM_KEY) or @input.scan(X_XWORD)
end

#parse_param_key_valueObject



241
242
243
244
245
246
247
248
# File 'lib/virginity/vcard21/parser.rb', line 241

def parse_param_key_value
  pos = @input.pos
  key = parse_param_key or return nil
  key.upcase!
  @input.skip(WS_EQUALS_WS) or return rollback(pos)
  value = @input.scan(XWORD) || ""
  Param.new(key, value)
end

#parse_paramsObject

params := 1*(‘;’ [ws] param [ws])



207
208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/virginity/vcard21/parser.rb', line 207

def parse_params
  params = []
  while p = parse_param
    params << p
    # some programs send us 2.1 cards with params in the 3.0-shorthand version "TYPE=fax,work
    # I added support for that although it is not according to the specs.
    if @input.scan(COMMA)
      val = @input.scan(XWORD) || ""
      params << Param.new(to_ascii(p.key), val)
    end
  end
  params
end

#parse_quoted_printableObject

FIXME: this could be much faster in inline C, since now, we’re creating 2 objects per crlf and scanning a line at least twice.



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/virginity/vcard21/parser.rb', line 140

def parse_quoted_printable
  buffer = ""
  while true
    input = @input.get_byte
    return buffer if input.empty? # at end of stream
    followed_by_crlf = !@input.match?(CRLF).nil?
    if input == EQUALS and followed_by_crlf
      parse_crlf
    elsif followed_by_crlf
      buffer << input
      return buffer
    else
      buffer << input
    end
  end
end

#parse_sevenbitasciiObject



89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/virginity/vcard21/parser.rb', line 89

def parse_sevenbitascii
  buffer = ""
  done = false
  until done
    if s = @input.scan(CRLF_LWSP)
      buffer << s[-1] unless @options[:vcard21_line_folding_with_extra_space]
    elsif @input.check(CRLF)
      return to_ascii(buffer)
    else
      buffer << @input.get_byte
    end
  end
  to_ascii buffer
end

#parse_startObject

produces an array of hashes start := [wsls] vcard [wsls]



260
261
262
263
264
265
# File 'lib/virginity/vcard21/parser.rb', line 260

def parse_start
  @input.skip(OPTIONAL_WSLS)
  vcard = parse_vcard or return nil
  @input.skip(OPTIONAL_WSLS)
  vcard
end

#parse_valueObject

value := sevenbitascii / quotedprintable / base64



345
346
347
348
349
350
351
352
353
354
# File 'lib/virginity/vcard21/parser.rb', line 345

def parse_value
  case @encoding
  when :quoted_printable
    parse_quoted_printable
  when :base64
    parse_base64 or parse_broken_base64
  else
    parse_sevenbitascii
  end
end

#parse_vcardObject

‘BEGIN’ [ws] ‘:’ [ws] ‘VCARD’ [ws] 1*CRLF items *CRLF ‘END’ [ws] ‘:’ [ws] ‘VCARD’ vcard := beginvcard items *crlf endvcard



269
270
271
272
273
274
275
# File 'lib/virginity/vcard21/parser.rb', line 269

def parse_vcard
  beginvcard = parse_beginvcard or return nil
  items = parse_items or return nil
  @input.skip(ONE_OR_MORE_CRLF) # and ignore it if there are none
  endvcard = parse_endvcard or return nil
  [beginvcard] + items + [endvcard]
end

#parse_xwordObject



195
196
197
# File 'lib/virginity/vcard21/parser.rb', line 195

def parse_xword
  @input.scan(XWORD)
end

#pr(s) ⇒ Object



13
14
15
# File 'lib/virginity/vcard21/parser.rb', line 13

def pr(s)
  puts "#{@input.pos}:\t#{@input.peek(40).inspect}\t#{s}"
end

#rollback(pos) ⇒ Object

helpers



25
26
27
28
# File 'lib/virginity/vcard21/parser.rb', line 25

def rollback(pos)
  @input.pos = pos
  nil
end

#star(method) ⇒ Object



38
39
40
41
42
43
44
# File 'lib/virginity/vcard21/parser.rb', line 38

def star(method)
  list = []
  until (result = try(method)).nil? do
    list << result
  end
  list
end

#try(method) ⇒ Object



30
31
32
33
34
35
36
# File 'lib/virginity/vcard21/parser.rb', line 30

def try(method)
  pos = @input.pos
  catch :rollback do
    return (method.call or throw :rollback)
  end
  rollback(pos)
end