Class: Oga::XML::Parser

Inherits:
LL::Driver
  • Object
show all
Defined in:
lib/oga/xml/parser.rb

Overview

files.

Direct Known Subclasses

HTML::Parser, PullParser, SaxParser

Constant Summary collapse

CONFIG =
LL::DriverConfig.new
TOKEN_ERROR_MAPPING =

Hash mapping token types and dedicated error labels.

Returns:

  • (Hash)
{
  :T_STRING         => 'string',
  :T_TEXT           => 'text',
  :T_DOCTYPE_START  => 'doctype start',
  :T_DOCTYPE_END    => 'doctype closing tag',
  :T_DOCTYPE_TYPE   => 'doctype type',
  :T_DOCTYPE_NAME   => 'doctype name',
  :T_DOCTYPE_INLINE => 'inline doctype rules',
  :T_CDATA          => 'CDATA',
  :T_COMMENT        => 'comment',
  :T_ELEM_START     => 'element start',
  :T_ELEM_NAME      => 'element name',
  :T_ELEM_NS        => 'element namespace',
  :T_ELEM_END       => 'element closing tag',
  :T_ATTR           => 'attribute',
  :T_ATTR_NS        => 'attribute namespace',
  :T_XML_DECL_START => 'XML declaration start',
  :T_XML_DECL_END   => 'XML declaration end',
  :T_PROC_INS_START => 'processing-instruction start',
  :T_PROC_INS_NAME  => 'processing-instruction name',
  :T_PROC_INS_END   => 'processing-instruction closing tag',
  -1                => 'end of input'
}

Instance Method Summary collapse

Constructor Details

#initialize(data, options = {}) ⇒ Parser

Returns a new instance of Parser.

Parameters:

  • data (String|IO)

    The input to parse.

  • options (Hash) (defaults to: {})

See Also:

  • Oga::XML::Parser.[Oga[Oga::XML[Oga::XML::Lexer[Oga::XML::Lexer#initialize]


223
224
225
226
227
228
# File 'lib/oga/xml/parser.rb', line 223

def initialize(data, options = {})
  @data  = data
  @lexer = Lexer.new(data, options)

  reset
end

Instance Method Details

#_rule_0(val) ⇒ Object



421
422
423
# File 'lib/oga/xml/parser.rb', line 421

def _rule_0(val)
   on_document(val[0]) 
end

#_rule_1(val) ⇒ Object



425
426
427
# File 'lib/oga/xml/parser.rb', line 425

def _rule_1(val)
  val[0]
end

#_rule_10(val) ⇒ Object



472
473
474
# File 'lib/oga/xml/parser.rb', line 472

def _rule_10(val)
   [] 
end

#_rule_11(val) ⇒ Object



476
477
478
# File 'lib/oga/xml/parser.rb', line 476

def _rule_11(val)
   [val[0], *val[1]] 
end

#_rule_12(val) ⇒ Object



480
481
482
# File 'lib/oga/xml/parser.rb', line 480

def _rule_12(val)
   [nil, nil, nil, val[0]] 
end

#_rule_13(val) ⇒ Object



484
485
486
# File 'lib/oga/xml/parser.rb', line 484

def _rule_13(val)
   val[0].inject(:+) 
end

#_rule_14(val) ⇒ Object



488
489
490
# File 'lib/oga/xml/parser.rb', line 488

def _rule_14(val)
   [val[0], val[1]] 
end

#_rule_15(val) ⇒ Object



492
493
494
# File 'lib/oga/xml/parser.rb', line 492

def _rule_15(val)
   nil 
end

#_rule_16(val) ⇒ Object



496
497
498
# File 'lib/oga/xml/parser.rb', line 496

def _rule_16(val)
   on_cdata(val[1]) 
end

#_rule_17(val) ⇒ Object



500
501
502
# File 'lib/oga/xml/parser.rb', line 500

def _rule_17(val)
   val[0] + val[1] 
end

#_rule_18(val) ⇒ Object



504
505
506
# File 'lib/oga/xml/parser.rb', line 504

def _rule_18(val)
   '' 
end

#_rule_19(val) ⇒ Object



508
509
510
# File 'lib/oga/xml/parser.rb', line 508

def _rule_19(val)
   on_comment(val[1]) 
end

#_rule_2(val) ⇒ Object



429
430
431
# File 'lib/oga/xml/parser.rb', line 429

def _rule_2(val)
  val[0]
end

#_rule_20(val) ⇒ Object



512
513
514
# File 'lib/oga/xml/parser.rb', line 512

def _rule_20(val)
   val[0] + val[1] 
end

#_rule_21(val) ⇒ Object



516
517
518
# File 'lib/oga/xml/parser.rb', line 516

def _rule_21(val)
   '' 
end

#_rule_22(val) ⇒ Object



520
521
522
523
524
# File 'lib/oga/xml/parser.rb', line 520

def _rule_22(val)
  
    on_proc_ins(val[1], val[2])
  
end

#_rule_23(val) ⇒ Object



526
527
528
# File 'lib/oga/xml/parser.rb', line 526

def _rule_23(val)
   val[0] + val[1] 
end

#_rule_24(val) ⇒ Object



530
531
532
# File 'lib/oga/xml/parser.rb', line 530

def _rule_24(val)
   '' 
end

#_rule_25(val) ⇒ Object



534
535
536
# File 'lib/oga/xml/parser.rb', line 534

def _rule_25(val)
   [nil, val[0]] 
end

#_rule_26(val) ⇒ Object



538
539
540
# File 'lib/oga/xml/parser.rb', line 538

def _rule_26(val)
   val 
end

#_rule_27(val) ⇒ Object



542
543
544
545
546
# File 'lib/oga/xml/parser.rb', line 542

def _rule_27(val)
  
    on_element(val[0][0], val[0][1], val[1])
  
end

#_rule_28(val) ⇒ Object



548
549
550
551
552
553
554
555
556
# File 'lib/oga/xml/parser.rb', line 548

def _rule_28(val)
  
    if val[0]
      on_element_children(val[0], val[1])
    end

    after_element(val[0])
  
end

#_rule_29(val) ⇒ Object



558
559
560
# File 'lib/oga/xml/parser.rb', line 558

def _rule_29(val)
   on_attributes(val[0]) 
end

#_rule_3(val) ⇒ Object



433
434
435
# File 'lib/oga/xml/parser.rb', line 433

def _rule_3(val)
  val[0]
end

#_rule_30(val) ⇒ Object



562
563
564
# File 'lib/oga/xml/parser.rb', line 562

def _rule_30(val)
   on_attribute(val[1], val[0], val[2]) 
end

#_rule_31(val) ⇒ Object



566
567
568
# File 'lib/oga/xml/parser.rb', line 566

def _rule_31(val)
   on_attribute(val[0], nil, val[1]) 
end

#_rule_32(val) ⇒ Object



570
571
572
# File 'lib/oga/xml/parser.rb', line 570

def _rule_32(val)
   on_xml_decl(val[1]) 
end

#_rule_33(val) ⇒ Object



574
575
576
577
578
579
580
# File 'lib/oga/xml/parser.rb', line 574

def _rule_33(val)
  
    text = val[1] ? val[0] + val[1] : val[0]

    on_text(text)
  
end

#_rule_34(val) ⇒ Object



582
583
584
# File 'lib/oga/xml/parser.rb', line 582

def _rule_34(val)
   val[1] ? val[0] + val[1] : val[0] 
end

#_rule_35(val) ⇒ Object



586
587
588
# File 'lib/oga/xml/parser.rb', line 586

def _rule_35(val)
   nil 
end

#_rule_36(val) ⇒ Object



590
591
592
# File 'lib/oga/xml/parser.rb', line 590

def _rule_36(val)
   val[1] 
end

#_rule_37(val) ⇒ Object



594
595
596
# File 'lib/oga/xml/parser.rb', line 594

def _rule_37(val)
   val[1] 
end

#_rule_38(val) ⇒ Object



598
599
600
# File 'lib/oga/xml/parser.rb', line 598

def _rule_38(val)
   val[0] + val[1] 
end

#_rule_39(val) ⇒ Object



602
603
604
# File 'lib/oga/xml/parser.rb', line 602

def _rule_39(val)
   '' 
end

#_rule_4(val) ⇒ Object



437
438
439
# File 'lib/oga/xml/parser.rb', line 437

def _rule_4(val)
  val[0]
end

#_rule_40(val) ⇒ Object



606
607
608
# File 'lib/oga/xml/parser.rb', line 606

def _rule_40(val)
  val[0]
end

#_rule_41(val) ⇒ Object



610
611
612
# File 'lib/oga/xml/parser.rb', line 610

def _rule_41(val)
  val[0]
end

#_rule_42(val) ⇒ Object



614
615
616
# File 'lib/oga/xml/parser.rb', line 614

def _rule_42(val)
  val[0]
end

#_rule_43(val) ⇒ Object



618
619
620
# File 'lib/oga/xml/parser.rb', line 618

def _rule_43(val)
  val[0]
end

#_rule_44(val) ⇒ Object



622
623
624
# File 'lib/oga/xml/parser.rb', line 622

def _rule_44(val)
  val[0]
end

#_rule_45(val) ⇒ Object



626
627
628
# File 'lib/oga/xml/parser.rb', line 626

def _rule_45(val)
  val[0]
end

#_rule_5(val) ⇒ Object



441
442
443
# File 'lib/oga/xml/parser.rb', line 441

def _rule_5(val)
  val[0]
end

#_rule_6(val) ⇒ Object



445
446
447
# File 'lib/oga/xml/parser.rb', line 445

def _rule_6(val)
  val[0]
end

#_rule_7(val) ⇒ Object



449
450
451
# File 'lib/oga/xml/parser.rb', line 449

def _rule_7(val)
  val[0]
end

#_rule_8(val) ⇒ Object



453
454
455
# File 'lib/oga/xml/parser.rb', line 453

def _rule_8(val)
  val[0]
end

#_rule_9(val) ⇒ Object



457
458
459
460
461
462
463
464
465
466
467
468
469
470
# File 'lib/oga/xml/parser.rb', line 457

def _rule_9(val)
  
    name   = val[1]
    follow = val[2]

    on_doctype(
      :name         => name,
      :type         => follow[0],
      :public_id    => follow[1],
      :system_id    => follow[2],
      :inline_rules => follow[3]
    )
  
end

#after_element(element) ⇒ Oga::XML::Element

Parameters:

Returns:



396
397
398
# File 'lib/oga/xml/parser.rb', line 396

def after_element(element)
  return element
end

#each_token {|| ... } ⇒ Object

Yields the next token from the lexer.

Yield Parameters:

  • (Array)


244
245
246
247
248
249
250
251
252
# File 'lib/oga/xml/parser.rb', line 244

def each_token
  @lexer.advance do |type, value, line|
    @line = line if line

    yield [type, value]
  end

  yield [-1, -1]
end

#on_attribute(name, ns_name = nil, value = nil) ⇒ Oga::XML::Attribute

Parameters:

  • name (String)
  • ns_name (String) (defaults to: nil)
  • value (String) (defaults to: nil)

Returns:



406
407
408
409
410
411
412
# File 'lib/oga/xml/parser.rb', line 406

def on_attribute(name, ns_name = nil, value = nil)
  return Attribute.new(
    :namespace_name => ns_name,
    :name           => name,
    :value          => value
  )
end

#on_attributes(attrs) ⇒ Object

Parameters:

  • attrs (Array)


417
418
419
# File 'lib/oga/xml/parser.rb', line 417

def on_attributes(attrs)
  return attrs
end

#on_cdata(text = nil) ⇒ Oga::XML::Cdata

Parameters:

  • text (String) (defaults to: nil)

Returns:



322
323
324
# File 'lib/oga/xml/parser.rb', line 322

def on_cdata(text = nil)
  return Cdata.new(:text => text)
end

#on_comment(text = nil) ⇒ Oga::XML::Comment

Parameters:

  • text (String) (defaults to: nil)

Returns:



330
331
332
# File 'lib/oga/xml/parser.rb', line 330

def on_comment(text = nil)
  return Comment.new(:text => text)
end

#on_doctype(options = {}) ⇒ Object

Parameters:

  • options (Hash) (defaults to: {})


314
315
316
# File 'lib/oga/xml/parser.rb', line 314

def on_doctype(options = {})
  return Doctype.new(options)
end

#on_document(children = []) ⇒ Oga::XML::Document

Parameters:

  • children (Array) (defaults to: [])

Returns:



293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# File 'lib/oga/xml/parser.rb', line 293

def on_document(children = [])
  document = Document.new(:type => @lexer.html? ? :html : :xml)

  children.each do |child|
    if child.is_a?(Doctype)
      document.doctype = child

    elsif child.is_a?(XmlDeclaration)
      document.xml_declaration = child

    else
      document.children << child
    end
  end

  return document
end

#on_element(namespace, name, attributes = {}) ⇒ Oga::XML::Element

Parameters:

  • namespace (String)
  • name (String)
  • attributes (Hash) (defaults to: {})

Returns:



371
372
373
374
375
376
377
378
379
# File 'lib/oga/xml/parser.rb', line 371

def on_element(namespace, name, attributes = {})
  element = Element.new(
    :namespace_name => namespace,
    :name           => name,
    :attributes     => attributes
  )

  return element
end

#on_element_children(element, children = []) ⇒ Oga::XML::Element

Parameters:

Returns:



386
387
388
389
390
# File 'lib/oga/xml/parser.rb', line 386

def on_element_children(element, children = [])
  element.children = children

  return element
end

#on_proc_ins(name, text = nil) ⇒ Oga::XML::ProcessingInstruction

Parameters:

  • name (String)
  • text (String) (defaults to: nil)

Returns:



339
340
341
# File 'lib/oga/xml/parser.rb', line 339

def on_proc_ins(name, text = nil)
  return ProcessingInstruction.new(:name => name, :text => text)
end

#on_text(text) ⇒ Oga::XML::Text

Parameters:

  • text (String)

Returns:



361
362
363
# File 'lib/oga/xml/parser.rb', line 361

def on_text(text)
  return Text.new(:text => text)
end

#on_xml_decl(attributes = []) ⇒ Oga::XML::XmlDeclaration

Parameters:

  • attributes (Array) (defaults to: [])

Returns:



347
348
349
350
351
352
353
354
355
# File 'lib/oga/xml/parser.rb', line 347

def on_xml_decl(attributes = [])
  options = {}

  attributes.each do |attr|
    options[attr.name.to_sym] = attr.value
  end

  return XmlDeclaration.new(options)
end

#parseObject

See Also:

  • Oga::XML::Parser.[LL[LL::Driver[LL::Driver#parse]


281
282
283
284
285
286
287
# File 'lib/oga/xml/parser.rb', line 281

def parse
  retval = super

  reset

  return retval
end

#parser_error(stack_type, stack_value, token_type, token_value) ⇒ Object

Parameters:

  • stack_type (Fixnum)
  • stack_value (Fixnum)
  • token_type (Symbol)
  • token_value (String)

Raises:

  • (LL::ParserError)


260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/oga/xml/parser.rb', line 260

def parser_error(stack_type, stack_value, token_type, token_value)
  case id_to_type(stack_type)
  when :rule
    message = "Unexpected #{token_type} for rule #{stack_value}"
  when :terminal
    expected = id_to_terminal(stack_value)
    expected = TOKEN_ERROR_MAPPING[expected]   || expected
    got      = TOKEN_ERROR_MAPPING[token_type] || token_type
    message  = "Unexpected #{got}, expected #{expected} instead"
  when :eof
    message = 'Unexpected end of input'
  end

  message += " on line #{@line}"

  raise LL::ParserError, message
end

#resetObject

Resets the internal state of the parser.



233
234
235
236
237
# File 'lib/oga/xml/parser.rb', line 233

def reset
  @line = 1

  @lexer.reset
end