Class: Moxml::Adapter::Rexml

Inherits:
Base
  • Object
show all
Defined in:
lib/moxml/adapter/rexml.rb

Class Method Summary collapse

Methods inherited from Base

create_cdata, create_comment, create_declaration, create_doctype, create_element, create_namespace, create_processing_instruction, create_text, patch_node, prepare_for_new_document, sax_supported?

Methods included from XmlUtils

#encode_entities, #normalize_xml_value, #validate_comment_content, #validate_declaration_encoding, #validate_declaration_standalone, #validate_declaration_version, #validate_element_name, #validate_pi_target, #validate_prefix, #validate_uri

Class Method Details

.add_child(element, child) ⇒ Object



258
259
260
261
262
263
264
265
266
267
268
269
270
271
# File 'lib/moxml/adapter/rexml.rb', line 258

def add_child(element, child)
  # Special handling for declarations on REXML documents
  if element.is_a?(::REXML::Document) && child.is_a?(::REXML::XMLDecl)
    # Set document's xml_decl directly
    element.instance_variable_set(:@xml_declaration, child)
  end

  case child
  when String
    element.add_text(child)
  else
    element.add(child)
  end
end

.add_next_sibling(node, sibling) ⇒ Object



283
284
285
286
# File 'lib/moxml/adapter/rexml.rb', line 283

def add_next_sibling(node, sibling)
  parent = node.parent
  parent.insert_after(node, sibling)
end

.add_previous_sibling(node, sibling) ⇒ Object



273
274
275
276
277
278
279
280
281
# File 'lib/moxml/adapter/rexml.rb', line 273

def add_previous_sibling(node, sibling)
  parent = node.parent
  # caveat: Rexml fails if children belong to the same parent and are already in a correct order
  # example: "<root><a/><b/></root>"
  # add_previous_sibling(node_b, node_a)
  # result: "<root><b/><a/></root>"
  # expected result: "<root><a/><b/></root>"
  parent.insert_before(node, sibling)
end

.at_xpath(node, expression, namespaces = {}) ⇒ Object



460
461
462
463
# File 'lib/moxml/adapter/rexml.rb', line 460

def at_xpath(node, expression, namespaces = {})
  results = xpath(node, expression, namespaces)
  results.first
end

.attribute_element(attribute) ⇒ Object



224
225
226
# File 'lib/moxml/adapter/rexml.rb', line 224

def attribute_element(attribute)
  attribute.element
end

.attributes(element) ⇒ Object



216
217
218
219
220
221
222
# File 'lib/moxml/adapter/rexml.rb', line 216

def attributes(element)
  return [] unless element.respond_to?(:attributes)

  # Only return non-namespace attributes
  element.attributes.values
    .reject { |attr| attr.prefix.to_s.start_with?("xmlns") }
end

.cdata_content(node) ⇒ Object



337
338
339
# File 'lib/moxml/adapter/rexml.rb', line 337

def cdata_content(node)
  node.value
end

.children(node) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/moxml/adapter/rexml.rb', line 142

def children(node)
  return [] unless node.respond_to?(:children)

  # Get all children and filter out empty text nodes between elements
  result = node.children.reject do |child|
    child.is_a?(::REXML::Text) &&
      child.to_s.strip.empty? &&
      !(child.next_sibling.nil? && child.previous_sibling.nil?)
  end

  # Ensure uniqueness by object_id to prevent duplicates
  result.uniq(&:object_id)
end

.comment_content(node) ⇒ Object



329
330
331
# File 'lib/moxml/adapter/rexml.rb', line 329

def comment_content(node)
  node.string
end

.create_document(_native_doc = nil) ⇒ Object



53
54
55
# File 'lib/moxml/adapter/rexml.rb', line 53

def create_document(_native_doc = nil)
  ::REXML::Document.new
end

.create_native_cdata(content) ⇒ Object



65
66
67
# File 'lib/moxml/adapter/rexml.rb', line 65

def create_native_cdata(content)
  ::REXML::CData.new(content.to_s)
end

.create_native_comment(content) ⇒ Object



69
70
71
# File 'lib/moxml/adapter/rexml.rb', line 69

def create_native_comment(content)
  ::REXML::Comment.new(content.to_s)
end

.create_native_declaration(version, encoding, standalone) ⇒ Object



78
79
80
# File 'lib/moxml/adapter/rexml.rb', line 78

def create_native_declaration(version, encoding, standalone)
  ::REXML::XMLDecl.new(version, encoding&.downcase, standalone)
end

.create_native_doctype(name, external_id, system_id) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/moxml/adapter/rexml.rb', line 82

def create_native_doctype(name, external_id, system_id)
  return nil unless name

  parts = [name]
  if external_id
    parts.push("PUBLIC", %("#{external_id}"))
    parts << %("#{system_id}") if system_id
  elsif system_id
    parts.push("SYSTEM", %("#{system_id}"))
  end

  ::REXML::DocType.new(parts.join(" "))
end

.create_native_element(name) ⇒ Object



57
58
59
# File 'lib/moxml/adapter/rexml.rb', line 57

def create_native_element(name)
  ::REXML::Element.new(name.to_s)
end

.create_native_namespace(element, prefix, uri) ⇒ Object

add a namespace definition, keep the element name unchanged



389
390
391
392
# File 'lib/moxml/adapter/rexml.rb', line 389

def create_native_namespace(element, prefix, uri)
  element.add_namespace(prefix.to_s, uri)
  ::REXML::Attribute.new(prefix.to_s, uri, element)
end

.create_native_processing_instruction(target, content) ⇒ Object



73
74
75
76
# File 'lib/moxml/adapter/rexml.rb', line 73

def create_native_processing_instruction(target, content)
  # Clone strings to avoid frozen string errors
  ::REXML::Instruction.new(target.to_s.dup, content.to_s.dup)
end

.create_native_text(content) ⇒ Object



61
62
63
# File 'lib/moxml/adapter/rexml.rb', line 61

def create_native_text(content)
  ::REXML::Text.new(content.to_s, true, nil)
end

.declaration_attribute(node, name) ⇒ Object



307
308
309
310
311
312
313
314
315
316
# File 'lib/moxml/adapter/rexml.rb', line 307

def declaration_attribute(node, name)
  case name
  when "version"
    node.version
  when "encoding"
    node.encoding
  when "standalone"
    node.standalone
  end
end

.document(node) ⇒ Object



208
209
210
# File 'lib/moxml/adapter/rexml.rb', line 208

def document(node)
  node.document
end

.duplicate_node(node) ⇒ Object



136
137
138
139
140
# File 'lib/moxml/adapter/rexml.rb', line 136

def duplicate_node(node)
  # Make a complete duplicate of the node
  # https://stackoverflow.com/questions/23878384/why-the-original-element-got-changed-when-i-modify-the-copy-created-by-dup-meth
  Marshal.load(Marshal.dump(node))
end

.get_attribute(element, name) ⇒ Object



246
247
248
# File 'lib/moxml/adapter/rexml.rb', line 246

def get_attribute(element, name)
  element.attributes.get_attribute(name)
end

.get_attribute_value(element, name) ⇒ Object



250
251
252
# File 'lib/moxml/adapter/rexml.rb', line 250

def get_attribute_value(element, name)
  element.attributes[name]
end

.inner_text(node) ⇒ Object



368
369
370
371
372
373
374
# File 'lib/moxml/adapter/rexml.rb', line 368

def inner_text(node)
  # Get direct text children only, filter duplicates
  text_children = node.children
    .select { _1.is_a?(::REXML::Text) }
    .uniq(&:object_id)
  text_children.map(&:value).join
end

.namespace(node) ⇒ Object



414
415
416
417
418
419
420
421
# File 'lib/moxml/adapter/rexml.rb', line 414

def namespace(node)
  prefix = node.prefix
  uri = node.namespace(prefix)
  return if prefix.to_s.empty? && uri.to_s.empty?

  owner = node.is_a?(::REXML::Attribute) ? node.element : node
  ::REXML::Attribute.new(prefix, uri, owner)
end

.namespace_definitions(node) ⇒ Object



423
424
425
426
427
# File 'lib/moxml/adapter/rexml.rb', line 423

def namespace_definitions(node)
  node.namespaces.map do |prefix, uri|
    ::REXML::Attribute.new(prefix.to_s, uri, node)
  end
end

.namespace_prefix(node) ⇒ Object



406
407
408
# File 'lib/moxml/adapter/rexml.rb', line 406

def namespace_prefix(node)
  node.name unless node.name == "xmlns"
end

.namespace_uri(node) ⇒ Object



410
411
412
# File 'lib/moxml/adapter/rexml.rb', line 410

def namespace_uri(node)
  node.value
end

.next_sibling(node) ⇒ Object



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/moxml/adapter/rexml.rb', line 160

def next_sibling(node)
  current = node.next_sibling

  # Skip empty text nodes and duplicates
  seen = Set.new
  while current
    if current.is_a?(::REXML::Text) && current.to_s.strip.empty?
      current = current.next_sibling
      next
    end

    # Check for duplicates
    if seen.include?(current.object_id)
      current = current.next_sibling
      next
    end

    seen.add(current.object_id)
    break
  end

  current
end

.node_name(node) ⇒ Object



125
126
127
128
129
130
131
132
133
134
# File 'lib/moxml/adapter/rexml.rb', line 125

def node_name(node)
  case node
  when ::REXML::Element, ::REXML::DocType
    node.name
  when ::REXML::XMLDecl
    "xml"
  when ::REXML::Instruction
    node.target
  end
end

.node_type(node) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/moxml/adapter/rexml.rb', line 100

def node_type(node)
  case node
  when ::REXML::Document then :document
  when ::REXML::Element then :element
  when ::REXML::CData then :cdata
  when ::REXML::Text then :text
  when ::REXML::Comment then :comment
  when ::REXML::Attribute then :attribute # but in fact it may be a namespace as well
  when ::REXML::Namespace then :namespace # we don't use this one
  when ::REXML::Instruction then :processing_instruction
  when ::REXML::DocType then :doctype
  when ::REXML::XMLDecl then :declaration
  else :unknown
  end
end

.parent(node) ⇒ Object



156
157
158
# File 'lib/moxml/adapter/rexml.rb', line 156

def parent(node)
  node.parent
end

.parse(xml, options = {}) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/moxml/adapter/rexml.rb', line 13

def parse(xml, options = {})
  native_doc = begin
    ::REXML::Document.new(xml)
  rescue ::REXML::ParseException => e
    if options[:strict]
      raise Moxml::ParseError.new(
        e.message,
        line: e.line,
        source: xml.is_a?(String) ? xml[0..100] : nil,
      )
    end
    create_document
  end

  DocumentBuilder.new(Context.new(:rexml)).build(native_doc)
end

.prepare_xpath_namespaces(node) ⇒ Object

not used at the moment but may be useful when the xpath is upgraded to work with namespaces



431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
# File 'lib/moxml/adapter/rexml.rb', line 431

def prepare_xpath_namespaces(node)
  ns = {}

  # Get all namespace definitions in scope
  all_ns = namespace_definitions(node)

  # Convert to XPath-friendly format
  all_ns.each do |prefix, uri|
    if prefix.to_s.empty?
      ns["xmlns"] = uri
    else
      ns[prefix] = uri
    end
  end

  ns
end

.previous_sibling(node) ⇒ Object



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/moxml/adapter/rexml.rb', line 184

def previous_sibling(node)
  current = node.previous_sibling

  # Skip empty text nodes and duplicates
  seen = Set.new
  while current
    if current.is_a?(::REXML::Text) && current.to_s.strip.empty?
      current = current.previous_sibling
      next
    end

    # Check for duplicates
    if seen.include?(current.object_id)
      current = current.previous_sibling
      next
    end

    seen.add(current.object_id)
    break
  end

  current
end

.processing_instruction_content(node) ⇒ Object



349
350
351
# File 'lib/moxml/adapter/rexml.rb', line 349

def processing_instruction_content(node)
  node.content
end

.processing_instruction_target(node) ⇒ Object



345
346
347
# File 'lib/moxml/adapter/rexml.rb', line 345

def processing_instruction_target(node)
  node.target
end

.remove(node) ⇒ Object



288
289
290
291
292
293
294
295
296
# File 'lib/moxml/adapter/rexml.rb', line 288

def remove(node)
  # Special handling for declarations on REXML documents
  if node.is_a?(::REXML::XMLDecl) && node.parent.is_a?(::REXML::Document)
    # Clear document's xml_declaration when removing declaration
    node.parent.instance_variable_set(:@xml_declaration, nil)
  end

  node.remove
end

.remove_attribute(element, name) ⇒ Object



254
255
256
# File 'lib/moxml/adapter/rexml.rb', line 254

def remove_attribute(element, name)
  element.delete_attribute(name.to_s)
end

.replace(node, new_node) ⇒ Object



298
299
300
# File 'lib/moxml/adapter/rexml.rb', line 298

def replace(node, new_node)
  node.replace_with(new_node)
end

.replace_children(element, children) ⇒ Object



302
303
304
305
# File 'lib/moxml/adapter/rexml.rb', line 302

def replace_children(element, children)
  element.children.each(&:remove)
  children.each { |child| element.add(child) }
end

.root(document) ⇒ Object



212
213
214
# File 'lib/moxml/adapter/rexml.rb', line 212

def root(document)
  document.root
end

.sax_parse(xml, handler) ⇒ void

This method returns an undefined value.

SAX parsing implementation for REXML

Parameters:



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/moxml/adapter/rexml.rb', line 35

def sax_parse(xml, handler)
  require "rexml/parsers/sax2parser"
  require "rexml/source"
  require "stringio"

  bridge = REXMLSAX2Bridge.new(handler)

  xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
  source = ::REXML::IOSource.new(StringIO.new(xml_string))

  parser = ::REXML::Parsers::SAX2Parser.new(source)
  parser.listen(bridge)
  parser.parse
rescue ::REXML::ParseException => e
  error = Moxml::ParseError.new(e.message, line: e.line)
  handler.on_error(error)
end

.serialize(node, options = {}) ⇒ Object



465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
# File 'lib/moxml/adapter/rexml.rb', line 465

def serialize(node, options = {})
  output = +""

  if node.is_a?(::REXML::Document)
    # Check if we should include declaration
    # Priority: explicit option > check if document has xml_decl
    should_include_decl = if options.key?(:no_declaration)
                            !options[:no_declaration]
                          else
                            # Include declaration only if document has xml_decl
                            !node.xml_decl.nil?
                          end

    # Include XML declaration only if should_include_decl and xml_decl exists
    if should_include_decl && node.xml_decl
      decl = node.xml_decl
      decl.encoding = options[:encoding] if options[:encoding]
      output << "<?xml"
      output << %( version="#{decl.version}") if decl.version
      output << %( encoding="#{decl.encoding}") if decl.encoding
      output << %( standalone="#{decl.standalone}") if decl.standalone
      output << "?>"
    end

    # output << "\n"
    node.doctype&.write(output)

    # Write processing instructions
    node.children.each do |child|
      next unless [::REXML::Instruction, ::REXML::CData,
                   ::REXML::Comment, ::REXML::Text].include?(child.class)

      write_with_formatter(child, output, options[:indent] || 2)
      # output << "\n"
    end

    if node.root
      write_with_formatter(node.root, output,
                           options[:indent] || 2)
    end
  else
    write_with_formatter(node, output, options[:indent] || 2)
  end

  output.strip
end

.set_attribute(element, name, value) ⇒ Object



228
229
230
231
# File 'lib/moxml/adapter/rexml.rb', line 228

def set_attribute(element, name, value)
  element.attributes[name&.to_s] = value&.to_s
  element.attributes.get_attribute(name&.to_s)
end

.set_attribute_name(attribute, name) ⇒ Object



233
234
235
236
237
238
239
240
# File 'lib/moxml/adapter/rexml.rb', line 233

def set_attribute_name(attribute, name)
  old_name = attribute.expanded_name
  attribute.name = name
  # Rexml doesn't change the keys of the attributes hash
  element = attribute.element
  element.attributes.delete(old_name)
  element.attributes << attribute
end

.set_attribute_value(attribute, value) ⇒ Object



242
243
244
# File 'lib/moxml/adapter/rexml.rb', line 242

def set_attribute_value(attribute, value)
  attribute.normalized = value
end

.set_cdata_content(node, content) ⇒ Object



341
342
343
# File 'lib/moxml/adapter/rexml.rb', line 341

def set_cdata_content(node, content)
  node.value = content.to_s
end

.set_comment_content(node, content) ⇒ Object



333
334
335
# File 'lib/moxml/adapter/rexml.rb', line 333

def set_comment_content(node, content)
  node.string = content.to_s
end

.set_declaration_attribute(node, name, value) ⇒ Object



318
319
320
321
322
323
324
325
326
327
# File 'lib/moxml/adapter/rexml.rb', line 318

def set_declaration_attribute(node, name, value)
  case name
  when "version"
    node.version = value
  when "encoding"
    node.encoding = value
  when "standalone"
    node.standalone = value
  end
end

.set_namespace(element, ns) ⇒ Object

add a namespace prefix to the element name AND a namespace definition



395
396
397
398
399
400
401
402
403
404
# File 'lib/moxml/adapter/rexml.rb', line 395

def set_namespace(element, ns)
  prefix = ns.name.to_s.empty? ? "xmlns" : ns.name.to_s
  if element.respond_to?(:add_namespace)
    element.add_namespace(prefix,
                          ns.value)
  end
  element.name = "#{prefix}:#{element.name}"
  owner = element.is_a?(::REXML::Attribute) ? element.element : element
  ::REXML::Attribute.new(prefix, ns.value, owner)
end

.set_node_name(node, name) ⇒ Object



116
117
118
119
120
121
122
123
# File 'lib/moxml/adapter/rexml.rb', line 116

def set_node_name(node, name)
  case node
  when ::REXML::Element
    node.name = name.to_s
  when ::REXML::Instruction
    node.target = name.to_s
  end
end

.set_processing_instruction_content(node, content) ⇒ Object



353
354
355
# File 'lib/moxml/adapter/rexml.rb', line 353

def set_processing_instruction_content(node, content)
  node.content = content.to_s
end

.set_root(doc, element) ⇒ Object



96
97
98
# File 'lib/moxml/adapter/rexml.rb', line 96

def set_root(doc, element)
  doc.add_element(element)
end

.set_text_content(node, content) ⇒ Object



376
377
378
379
380
381
382
383
384
385
386
# File 'lib/moxml/adapter/rexml.rb', line 376

def set_text_content(node, content)
  case node
  when ::REXML::Text, ::REXML::CData
    node.value = content.to_s
  when ::REXML::Element
    # Remove existing text nodes to prevent duplicates
    node.texts.each(&:remove)
    # Add new text content
    node.add_text(content.to_s)
  end
end

.text_content(node) ⇒ Object



357
358
359
360
361
362
363
364
365
366
# File 'lib/moxml/adapter/rexml.rb', line 357

def text_content(node)
  case node
  when ::REXML::Text, ::REXML::CData
    node.value.to_s
  when ::REXML::Element
    # Get all text nodes, filter out duplicates, and join
    text_nodes = node.texts.uniq(&:object_id)
    text_nodes.map(&:value).join
  end
end

.xpath(node, expression, _namespaces = {}) ⇒ Object



449
450
451
452
453
454
455
456
457
458
# File 'lib/moxml/adapter/rexml.rb', line 449

def xpath(node, expression, _namespaces = {})
  node.get_elements(expression).to_a
rescue ::REXML::ParseException => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "REXML",
    node: node,
  )
end