Class: Moxml::Adapter::Rexml

Inherits:
Base
  • Object
show all
Defined in:
lib/moxml/adapter/rexml.rb

Class Method Summary collapse

Methods inherited from Base

create_cdata, create_comment, create_declaration, create_doctype, create_element, create_namespace, create_processing_instruction, create_text, patch_node, prepare_for_new_document, sax_supported?

Methods included from XmlUtils

#encode_entities, #normalize_xml_value, #validate_comment_content, #validate_declaration_encoding, #validate_declaration_standalone, #validate_declaration_version, #validate_element_name, #validate_pi_target, #validate_prefix, #validate_uri

Class Method Details

.add_child(element, child) ⇒ Object



257
258
259
260
261
262
263
264
# File 'lib/moxml/adapter/rexml.rb', line 257

def add_child(element, child)
  case child
  when String
    element.add_text(child)
  else
    element.add(child)
  end
end

.add_next_sibling(node, sibling) ⇒ Object



276
277
278
279
# File 'lib/moxml/adapter/rexml.rb', line 276

def add_next_sibling(node, sibling)
  parent = node.parent
  parent.insert_after(node, sibling)
end

.add_previous_sibling(node, sibling) ⇒ Object



266
267
268
269
270
271
272
273
274
# File 'lib/moxml/adapter/rexml.rb', line 266

def add_previous_sibling(node, sibling)
  parent = node.parent
  # caveat: Rexml fails if children belong to the same parent and are already in a correct order
  # example: "<root><a/><b/></root>"
  # add_previous_sibling(node_b, node_a)
  # result: "<root><b/><a/></root>"
  # expected result: "<root><a/><b/></root>"
  parent.insert_before(node, sibling)
end

.at_xpath(node, expression, namespaces = {}) ⇒ Object



447
448
449
450
# File 'lib/moxml/adapter/rexml.rb', line 447

def at_xpath(node, expression, namespaces = {})
  results = xpath(node, expression, namespaces)
  results.first
end

.attribute_element(attribute) ⇒ Object



223
224
225
# File 'lib/moxml/adapter/rexml.rb', line 223

def attribute_element(attribute)
  attribute.element
end

.attributes(element) ⇒ Object



215
216
217
218
219
220
221
# File 'lib/moxml/adapter/rexml.rb', line 215

def attributes(element)
  return [] unless element.respond_to?(:attributes)

  # Only return non-namespace attributes
  element.attributes.values
    .reject { |attr| attr.prefix.to_s.start_with?("xmlns") }
end

.cdata_content(node) ⇒ Object



324
325
326
# File 'lib/moxml/adapter/rexml.rb', line 324

def cdata_content(node)
  node.value
end

.children(node) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/moxml/adapter/rexml.rb', line 141

def children(node)
  return [] unless node.respond_to?(:children)

  # Get all children and filter out empty text nodes between elements
  result = node.children.reject do |child|
    child.is_a?(::REXML::Text) &&
      child.to_s.strip.empty? &&
      !(child.next_sibling.nil? && child.previous_sibling.nil?)
  end

  # Ensure uniqueness by object_id to prevent duplicates
  result.uniq(&:object_id)
end

.comment_content(node) ⇒ Object



316
317
318
# File 'lib/moxml/adapter/rexml.rb', line 316

def comment_content(node)
  node.string
end

.create_document(_native_doc = nil) ⇒ Object



52
53
54
# File 'lib/moxml/adapter/rexml.rb', line 52

def create_document(_native_doc = nil)
  ::REXML::Document.new
end

.create_native_cdata(content) ⇒ Object



64
65
66
# File 'lib/moxml/adapter/rexml.rb', line 64

def create_native_cdata(content)
  ::REXML::CData.new(content.to_s)
end

.create_native_comment(content) ⇒ Object



68
69
70
# File 'lib/moxml/adapter/rexml.rb', line 68

def create_native_comment(content)
  ::REXML::Comment.new(content.to_s)
end

.create_native_declaration(version, encoding, standalone) ⇒ Object



77
78
79
# File 'lib/moxml/adapter/rexml.rb', line 77

def create_native_declaration(version, encoding, standalone)
  ::REXML::XMLDecl.new(version, encoding&.downcase, standalone)
end

.create_native_doctype(name, external_id, system_id) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/moxml/adapter/rexml.rb', line 81

def create_native_doctype(name, external_id, system_id)
  return nil unless name

  parts = [name]
  if external_id
    parts.push("PUBLIC", %("#{external_id}"))
    parts << %("#{system_id}") if system_id
  elsif system_id
    parts.push("SYSTEM", %("#{system_id}"))
  end

  ::REXML::DocType.new(parts.join(" "))
end

.create_native_element(name) ⇒ Object



56
57
58
# File 'lib/moxml/adapter/rexml.rb', line 56

def create_native_element(name)
  ::REXML::Element.new(name.to_s)
end

.create_native_namespace(element, prefix, uri) ⇒ Object

add a namespace definition, keep the element name unchanged



376
377
378
379
# File 'lib/moxml/adapter/rexml.rb', line 376

def create_native_namespace(element, prefix, uri)
  element.add_namespace(prefix.to_s, uri)
  ::REXML::Attribute.new(prefix.to_s, uri, element)
end

.create_native_processing_instruction(target, content) ⇒ Object



72
73
74
75
# File 'lib/moxml/adapter/rexml.rb', line 72

def create_native_processing_instruction(target, content)
  # Clone strings to avoid frozen string errors
  ::REXML::Instruction.new(target.to_s.dup, content.to_s.dup)
end

.create_native_text(content) ⇒ Object



60
61
62
# File 'lib/moxml/adapter/rexml.rb', line 60

def create_native_text(content)
  ::REXML::Text.new(content.to_s, true, nil)
end

.declaration_attribute(node, name) ⇒ Object



294
295
296
297
298
299
300
301
302
303
# File 'lib/moxml/adapter/rexml.rb', line 294

def declaration_attribute(node, name)
  case name
  when "version"
    node.version
  when "encoding"
    node.encoding
  when "standalone"
    node.standalone
  end
end

.document(node) ⇒ Object



207
208
209
# File 'lib/moxml/adapter/rexml.rb', line 207

def document(node)
  node.document
end

.duplicate_node(node) ⇒ Object



135
136
137
138
139
# File 'lib/moxml/adapter/rexml.rb', line 135

def duplicate_node(node)
  # Make a complete duplicate of the node
  # https://stackoverflow.com/questions/23878384/why-the-original-element-got-changed-when-i-modify-the-copy-created-by-dup-meth
  Marshal.load(Marshal.dump(node))
end

.get_attribute(element, name) ⇒ Object



245
246
247
# File 'lib/moxml/adapter/rexml.rb', line 245

def get_attribute(element, name)
  element.attributes.get_attribute(name)
end

.get_attribute_value(element, name) ⇒ Object



249
250
251
# File 'lib/moxml/adapter/rexml.rb', line 249

def get_attribute_value(element, name)
  element.attributes[name]
end

.inner_text(node) ⇒ Object



355
356
357
358
359
360
361
# File 'lib/moxml/adapter/rexml.rb', line 355

def inner_text(node)
  # Get direct text children only, filter duplicates
  text_children = node.children
    .select { _1.is_a?(::REXML::Text) }
    .uniq(&:object_id)
  text_children.map(&:value).join
end

.namespace(node) ⇒ Object



401
402
403
404
405
406
407
408
# File 'lib/moxml/adapter/rexml.rb', line 401

def namespace(node)
  prefix = node.prefix
  uri = node.namespace(prefix)
  return if prefix.to_s.empty? && uri.to_s.empty?

  owner = node.is_a?(::REXML::Attribute) ? node.element : node
  ::REXML::Attribute.new(prefix, uri, owner)
end

.namespace_definitions(node) ⇒ Object



410
411
412
413
414
# File 'lib/moxml/adapter/rexml.rb', line 410

def namespace_definitions(node)
  node.namespaces.map do |prefix, uri|
    ::REXML::Attribute.new(prefix.to_s, uri, node)
  end
end

.namespace_prefix(node) ⇒ Object



393
394
395
# File 'lib/moxml/adapter/rexml.rb', line 393

def namespace_prefix(node)
  node.name unless node.name == "xmlns"
end

.namespace_uri(node) ⇒ Object



397
398
399
# File 'lib/moxml/adapter/rexml.rb', line 397

def namespace_uri(node)
  node.value
end

.next_sibling(node) ⇒ Object



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/moxml/adapter/rexml.rb', line 159

def next_sibling(node)
  current = node.next_sibling

  # Skip empty text nodes and duplicates
  seen = Set.new
  while current
    if current.is_a?(::REXML::Text) && current.to_s.strip.empty?
      current = current.next_sibling
      next
    end

    # Check for duplicates
    if seen.include?(current.object_id)
      current = current.next_sibling
      next
    end

    seen.add(current.object_id)
    break
  end

  current
end

.node_name(node) ⇒ Object



124
125
126
127
128
129
130
131
132
133
# File 'lib/moxml/adapter/rexml.rb', line 124

def node_name(node)
  case node
  when ::REXML::Element, ::REXML::DocType
    node.name
  when ::REXML::XMLDecl
    "xml"
  when ::REXML::Instruction
    node.target
  end
end

.node_type(node) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/moxml/adapter/rexml.rb', line 99

def node_type(node)
  case node
  when ::REXML::Document then :document
  when ::REXML::Element then :element
  when ::REXML::CData then :cdata
  when ::REXML::Text then :text
  when ::REXML::Comment then :comment
  when ::REXML::Attribute then :attribute # but in fact it may be a namespace as well
  when ::REXML::Namespace then :namespace # we don't use this one
  when ::REXML::Instruction then :processing_instruction
  when ::REXML::DocType then :doctype
  when ::REXML::XMLDecl then :declaration
  else :unknown
  end
end

.parent(node) ⇒ Object



155
156
157
# File 'lib/moxml/adapter/rexml.rb', line 155

def parent(node)
  node.parent
end

.parse(xml, options = {}) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/moxml/adapter/rexml.rb', line 13

def parse(xml, options = {})
  native_doc = begin
    ::REXML::Document.new(xml)
  rescue ::REXML::ParseException => e
    if options[:strict]
      raise Moxml::ParseError.new(
        e.message,
        line: e.line,
        source: xml.is_a?(String) ? xml[0..100] : nil,
      )
    end
    create_document
  end
  DocumentBuilder.new(Context.new(:rexml)).build(native_doc)
end

.prepare_xpath_namespaces(node) ⇒ Object

not used at the moment but may be useful when the xpath is upgraded to work with namespaces



418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
# File 'lib/moxml/adapter/rexml.rb', line 418

def prepare_xpath_namespaces(node)
  ns = {}

  # Get all namespace definitions in scope
  all_ns = namespace_definitions(node)

  # Convert to XPath-friendly format
  all_ns.each do |prefix, uri|
    if prefix.to_s.empty?
      ns["xmlns"] = uri
    else
      ns[prefix] = uri
    end
  end

  ns
end

.previous_sibling(node) ⇒ Object



183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/moxml/adapter/rexml.rb', line 183

def previous_sibling(node)
  current = node.previous_sibling

  # Skip empty text nodes and duplicates
  seen = Set.new
  while current
    if current.is_a?(::REXML::Text) && current.to_s.strip.empty?
      current = current.previous_sibling
      next
    end

    # Check for duplicates
    if seen.include?(current.object_id)
      current = current.previous_sibling
      next
    end

    seen.add(current.object_id)
    break
  end

  current
end

.processing_instruction_content(node) ⇒ Object



336
337
338
# File 'lib/moxml/adapter/rexml.rb', line 336

def processing_instruction_content(node)
  node.content
end

.processing_instruction_target(node) ⇒ Object



332
333
334
# File 'lib/moxml/adapter/rexml.rb', line 332

def processing_instruction_target(node)
  node.target
end

.remove(node) ⇒ Object



281
282
283
# File 'lib/moxml/adapter/rexml.rb', line 281

def remove(node)
  node.remove
end

.remove_attribute(element, name) ⇒ Object



253
254
255
# File 'lib/moxml/adapter/rexml.rb', line 253

def remove_attribute(element, name)
  element.delete_attribute(name.to_s)
end

.replace(node, new_node) ⇒ Object



285
286
287
# File 'lib/moxml/adapter/rexml.rb', line 285

def replace(node, new_node)
  node.replace_with(new_node)
end

.replace_children(element, children) ⇒ Object



289
290
291
292
# File 'lib/moxml/adapter/rexml.rb', line 289

def replace_children(element, children)
  element.children.each(&:remove)
  children.each { |child| element.add(child) }
end

.root(document) ⇒ Object



211
212
213
# File 'lib/moxml/adapter/rexml.rb', line 211

def root(document)
  document.root
end

.sax_parse(xml, handler) ⇒ void

This method returns an undefined value.

SAX parsing implementation for REXML

Parameters:



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/moxml/adapter/rexml.rb', line 34

def sax_parse(xml, handler)
  require "rexml/parsers/sax2parser"
  require "rexml/source"
  require "stringio"

  bridge = REXMLSAX2Bridge.new(handler)

  xml_string = xml.respond_to?(:read) ? xml.read : xml.to_s
  source = ::REXML::IOSource.new(StringIO.new(xml_string))

  parser = ::REXML::Parsers::SAX2Parser.new(source)
  parser.listen(bridge)
  parser.parse
rescue ::REXML::ParseException => e
  error = Moxml::ParseError.new(e.message, line: e.line)
  handler.on_error(error)
end

.serialize(node, options = {}) ⇒ Object



452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
# File 'lib/moxml/adapter/rexml.rb', line 452

def serialize(node, options = {})
  output = +""

  if node.is_a?(::REXML::Document)
    # Always include XML declaration
    decl = node.xml_decl || ::REXML::XMLDecl.new("1.0",
                                                 options[:encoding] || "UTF-8")
    decl.encoding = options[:encoding] if options[:encoding]
    output << "<?xml"
    output << %( version="#{decl.version}") if decl.version
    output << %( encoding="#{decl.encoding}") if decl.encoding
    output << %( standalone="#{decl.standalone}") if decl.standalone
    output << "?>"
    # output << "\n"

    # output << "\n"
    node.doctype&.write(output)

    # Write processing instructions
    node.children.each do |child|
      next unless [::REXML::Instruction, ::REXML::CData,
                   ::REXML::Comment, ::REXML::Text].include?(child.class)

      write_with_formatter(child, output, options[:indent] || 2)
      # output << "\n"
    end

    if node.root
      write_with_formatter(node.root, output,
                           options[:indent] || 2)
    end
  else
    write_with_formatter(node, output, options[:indent] || 2)
  end

  output.strip
end

.set_attribute(element, name, value) ⇒ Object



227
228
229
230
# File 'lib/moxml/adapter/rexml.rb', line 227

def set_attribute(element, name, value)
  element.attributes[name&.to_s] = value&.to_s
  element.attributes.get_attribute(name&.to_s)
end

.set_attribute_name(attribute, name) ⇒ Object



232
233
234
235
236
237
238
239
# File 'lib/moxml/adapter/rexml.rb', line 232

def set_attribute_name(attribute, name)
  old_name = attribute.expanded_name
  attribute.name = name
  # Rexml doesn't change the keys of the attributes hash
  element = attribute.element
  element.attributes.delete(old_name)
  element.attributes << attribute
end

.set_attribute_value(attribute, value) ⇒ Object



241
242
243
# File 'lib/moxml/adapter/rexml.rb', line 241

def set_attribute_value(attribute, value)
  attribute.normalized = value
end

.set_cdata_content(node, content) ⇒ Object



328
329
330
# File 'lib/moxml/adapter/rexml.rb', line 328

def set_cdata_content(node, content)
  node.value = content.to_s
end

.set_comment_content(node, content) ⇒ Object



320
321
322
# File 'lib/moxml/adapter/rexml.rb', line 320

def set_comment_content(node, content)
  node.string = content.to_s
end

.set_declaration_attribute(node, name, value) ⇒ Object



305
306
307
308
309
310
311
312
313
314
# File 'lib/moxml/adapter/rexml.rb', line 305

def set_declaration_attribute(node, name, value)
  case name
  when "version"
    node.version = value
  when "encoding"
    node.encoding = value
  when "standalone"
    node.standalone = value
  end
end

.set_namespace(element, ns) ⇒ Object

add a namespace prefix to the element name AND a namespace definition



382
383
384
385
386
387
388
389
390
391
# File 'lib/moxml/adapter/rexml.rb', line 382

def set_namespace(element, ns)
  prefix = ns.name.to_s.empty? ? "xmlns" : ns.name.to_s
  if element.respond_to?(:add_namespace)
    element.add_namespace(prefix,
                          ns.value)
  end
  element.name = "#{prefix}:#{element.name}"
  owner = element.is_a?(::REXML::Attribute) ? element.element : element
  ::REXML::Attribute.new(prefix, ns.value, owner)
end

.set_node_name(node, name) ⇒ Object



115
116
117
118
119
120
121
122
# File 'lib/moxml/adapter/rexml.rb', line 115

def set_node_name(node, name)
  case node
  when ::REXML::Element
    node.name = name.to_s
  when ::REXML::Instruction
    node.target = name.to_s
  end
end

.set_processing_instruction_content(node, content) ⇒ Object



340
341
342
# File 'lib/moxml/adapter/rexml.rb', line 340

def set_processing_instruction_content(node, content)
  node.content = content.to_s
end

.set_root(doc, element) ⇒ Object



95
96
97
# File 'lib/moxml/adapter/rexml.rb', line 95

def set_root(doc, element)
  doc.add_element(element)
end

.set_text_content(node, content) ⇒ Object



363
364
365
366
367
368
369
370
371
372
373
# File 'lib/moxml/adapter/rexml.rb', line 363

def set_text_content(node, content)
  case node
  when ::REXML::Text, ::REXML::CData
    node.value = content.to_s
  when ::REXML::Element
    # Remove existing text nodes to prevent duplicates
    node.texts.each(&:remove)
    # Add new text content
    node.add_text(content.to_s)
  end
end

.text_content(node) ⇒ Object



344
345
346
347
348
349
350
351
352
353
# File 'lib/moxml/adapter/rexml.rb', line 344

def text_content(node)
  case node
  when ::REXML::Text, ::REXML::CData
    node.value.to_s
  when ::REXML::Element
    # Get all text nodes, filter out duplicates, and join
    text_nodes = node.texts.uniq(&:object_id)
    text_nodes.map(&:value).join
  end
end

.xpath(node, expression, _namespaces = {}) ⇒ Object



436
437
438
439
440
441
442
443
444
445
# File 'lib/moxml/adapter/rexml.rb', line 436

def xpath(node, expression, _namespaces = {})
  node.get_elements(expression).to_a
rescue ::REXML::ParseException => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "REXML",
    node: node,
  )
end