Module: MultiXml

Defined in:
lib/multi_xml.rb,
lib/multi_xml/version.rb,
lib/multi_xml/parsers/ox.rb,
lib/multi_xml/parsers/oga.rb,
lib/multi_xml/parsers/rexml.rb,
lib/multi_xml/parsers/libxml.rb,
lib/multi_xml/parsers/nokogiri.rb,
lib/multi_xml/parsers/libxml2_parser.rb

Overview

Each MultiXml parser is expected to parse an XML document into a Hash. The conversion rules are:

  • Each document starts out as an empty Hash.

  • Reading an element created an entry in the parent Hash that has a key of the element name and a value of a Hash with attributes as key value pairs. Children are added as described by this rule.

  • Text and CDATE is stored in the parent element Hash with a key of MultiXml::CONTENT_ROOT and a value of the text itself.

  • If a key already exists in the Hash then the value associated with the key is converted to an Array with the old and new value in it.

  • Other elements such as the xml prolog, doctype, and comments are ignored.

Defined Under Namespace

Modules: FileLike, Parsers Classes: DisallowedTypeError, NoParserError, ParseError

Constant Summary collapse

REQUIREMENT_MAP =
[
  ["ox", :ox],
  ["libxml", :libxml],
  ["nokogiri", :nokogiri],
  ["rexml/document", :rexml],
  ["oga", :oga]
].freeze
CONTENT_ROOT =
"__content__".freeze
PARSING =
{
  "symbol" => proc { |symbol| symbol.to_sym },
  "date" => proc { |date| Date.parse(date) },
  "datetime" => datetime_proc,
  "dateTime" => datetime_proc,
  "integer" => proc { |integer| integer.to_i },
  "float" => float_proc,
  "double" => float_proc,
  "decimal" => proc { |number| BigDecimal(number) },
  "boolean" => proc { |boolean| !%w[0 false].include?(boolean.strip) },
  "string" => proc { |string| string.to_s },
  "yaml" => proc { |yaml| YAML.load(yaml) rescue yaml }, # rubocop:disable Style/RescueModifier
  "base64Binary" => proc { |binary| base64_decode(binary) },
  "binary" => proc { |binary, entity| parse_binary(binary, entity) },
  "file" => proc { |file, entity| parse_file(file, entity) }
}.freeze
TYPE_NAMES =
{
  "Symbol" => "symbol",
  "Integer" => "integer",
  "BigDecimal" => "decimal",
  "Float" => "float",
  "TrueClass" => "boolean",
  "FalseClass" => "boolean",
  "Date" => "date",
  "DateTime" => "datetime",
  "Time" => "datetime",
  "Array" => "array",
  "Hash" => "hash"
}.freeze
DISALLOWED_XML_TYPES =
%w[symbol yaml].freeze
DEFAULT_OPTIONS =
{
  typecast_xml_value: true,
  disallowed_types: DISALLOWED_XML_TYPES,
  symbolize_keys: false
}.freeze
VERSION =
Gem::Version.create("0.7.1")

Class Method Summary collapse

Class Method Details

.default_parserObject

The default parser based on what you currently have loaded and installed. First checks to see if any parsers are already loaded, then checks to see which are installed if none are loaded.

Raises:



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/multi_xml.rb', line 89

def default_parser
  return :ox if defined?(::Ox)
  return :libxml if defined?(::LibXML)
  return :nokogiri if defined?(::Nokogiri)
  return :oga if defined?(::Oga)

  REQUIREMENT_MAP.each do |library, parser|
    require library
    return parser
  rescue LoadError
    next
  end
  raise(NoParserError,
    "No XML parser detected. If you're using Rubinius and Bundler, try adding an XML parser to your Gemfile (e.g. libxml-ruby, nokogiri, or rubysl-rexml). For more information, see https://github.com/sferik/multi_xml/issues/42.")
end

.parse(xml, options = {}) ⇒ Object

Parse an XML string or IO into Ruby.

Options

:symbolize_keys :: If true, will use symbols instead of strings for the keys.

:disallowed_types :: Types to disallow from being typecasted. Defaults to ['yaml', 'symbol']. Use [] to allow all types.

:typecast_xml_value :: If true, won't typecast values for parsed document



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/multi_xml.rb', line 134

def parse(xml, options = {}) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
  xml ||= ""

  options = DEFAULT_OPTIONS.merge(options)

  xml = xml.strip if xml.respond_to?(:strip)
  begin
    xml = StringIO.new(xml) unless xml.respond_to?(:read)

    char = xml.getc
    return {} if char.nil?

    xml.ungetc(char)

    hash = undasherize_keys(parser.parse(xml) || {})
    hash = typecast_xml_value(hash, options[:disallowed_types]) if options[:typecast_xml_value]
  rescue DisallowedTypeError
    raise
  rescue parser.parse_error => e
    raise(ParseError, e.message, e.backtrace)
  end
  hash = symbolize_keys(hash) if options[:symbolize_keys]
  hash
end

.parserObject

Get the current parser class.



78
79
80
81
82
83
# File 'lib/multi_xml.rb', line 78

def parser
  return @parser if defined?(@parser)

  self.parser = default_parser
  @parser
end

.parser=(new_parser) ⇒ Object

Set the XML parser utilizing a symbol, string, or class. Supported by default are:

  • :libxml
  • :nokogiri
  • :ox
  • :rexml
  • :oga


113
114
115
116
117
118
119
120
121
122
123
# File 'lib/multi_xml.rb', line 113

def parser=(new_parser)
  case new_parser
  when String, Symbol
    require "multi_xml/parsers/#{new_parser.to_s.downcase}"
    @parser = MultiXml::Parsers.const_get(new_parser.to_s.split("_").collect(&:capitalize).join.to_s)
  when Class, Module
    @parser = new_parser
  else
    raise("Did not recognize your parser specification. Please specify either a symbol or a class.")
  end
end