Class: RDF::Microdata::Format

Inherits:
Format
  • Object
show all
Defined in:
lib/rdf/microdata/format.rb

Overview

Microdata format specification.

Examples:

Obtaining a Microdata format class

RDF::Format.for(:microdata)         #=> RDF::Microdata::Format
RDF::Format.for("etc/foaf.html")
RDF::Format.for(:file_name      => "etc/foaf.html")
RDF::Format.for(file_extension: "html")
RDF::Format.for(:content_type   => "text/html")

Obtaining serialization format MIME types

RDF::Format.content_types      #=> {"text/html" => [RDF::Microdata::Format]}

See Also:

Class Method Summary collapse

Class Method Details

.cli_commandsHash{Symbol => Hash}

Hash of CLI commands appropriate for this format

Returns:

  • (Hash{Symbol => Hash})


48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/rdf/microdata/format.rb', line 48

def self.cli_commands
  {
    "to-rdfa": {
      description: "Transform HTML+Microdata into HTML+RDFa",
      parse: false,
      help: "to-rdfa files ...\nTransform HTML+Microdata into HTML+RDFa",
      filter: {
        format: :microdata
      },
      option_use: {output_format: :disabled},
      lambda: ->(files, options) do
        out = options[:output] || $stdout
        xsl = Nokogiri::XSLT(%(<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
          <xsl:param name="indent-increment" select="'  '"/>
          <xsl:output method="html" doctype-system="about:legacy-compat"/>
 
          <xsl:template name="newline">
            <xsl:text disable-output-escaping="yes">
        </xsl:text>
          </xsl:template>
 
          <xsl:template match="comment() | processing-instruction()">
            <xsl:param name="indent" select="''"/>
            <xsl:call-template name="newline"/>
            <xsl:value-of select="$indent"/>
            <xsl:copy />
          </xsl:template>
 
          <xsl:template match="text()">
            <xsl:param name="indent" select="''"/>
            <xsl:call-template name="newline"/>
            <xsl:value-of select="$indent"/>
            <xsl:value-of select="normalize-space(.)"/>
          </xsl:template>
 
          <xsl:template match="text()[normalize-space(.)='']"/>
 
          <xsl:template match="*">
            <xsl:param name="indent" select="''"/>
            <xsl:call-template name="newline"/>
            <xsl:value-of select="$indent"/>
              <xsl:choose>
               <xsl:when test="count(child::*) > 0">
                <xsl:copy>
                 <xsl:copy-of select="@*"/>
                 <xsl:apply-templates select="*|text()">
                   <xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
                 </xsl:apply-templates>
                 <xsl:call-template name="newline"/>
                 <xsl:value-of select="$indent"/>
                </xsl:copy>
               </xsl:when>
               <xsl:otherwise>
                <xsl:copy-of select="."/>
               </xsl:otherwise>
             </xsl:choose>
          </xsl:template>
        </xsl:stylesheet>).gsub(/^            /, ''))
        if files.empty?
          # If files are empty, either use options[::evaluate]
          input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
          input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
          RDF::Microdata::Reader.new(input, options.merge(rdfa: true)) do |reader|
            reader.rdfa.xpath("//text()").each do |txt|
              txt.content = txt.content.to_s.strip
            end
            out.puts xsl.apply_to(reader.rdfa).to_s
          end
        else
          files.each do |file|
            RDF::Microdata::Reader.open(file, options.merge(rdfa: true)) do |reader|
              reader.rdfa.xpath("//text()").each do |txt|
                txt.content = txt.content.to_s.strip
              end
              out.puts xsl.apply_to(reader.rdfa).to_s
            end
          end
        end
      end
    },
    "to-jsonld": {
      description: "Transform HTML+Microdata into JSON-LD",
      parse: false,
      help: "to-jsonld files ...\nTransform HTML+Microdata into JSON-LD",
      filter: {
        format: :microdata
      },
      option_use: {output_format: :disabled},
      lambda: ->(files, options) do
        out = options[:output] || $stdout
        if files.empty?
          # If files are empty, either use options[::evaluate]
          input = options[:evaluate] ? StringIO.new(options[:evaluate]) : STDIN
          input.set_encoding(options.fetch(:encoding, Encoding::UTF_8))
          RDF::Microdata::Reader.new(input, options.merge(jsonld: true)) do |reader|
            out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
          end
        else
          files.each do |file|
            RDF::Microdata::Reader.open(file, options.merge(jsonld: true)) do |reader|
              out.puts reader.jsonld.to_json(::JSON::LD::JSON_STATE)
            end
          end
        end
      end
    },
  }
end

.detect(sample) ⇒ Boolean

Sample detection to see if it matches Microdata (not RDF/XML or RDFa)

Use a text sample to detect the format of an input file. Sub-classes implement a matcher sufficient to detect probably format matches, including disambiguating between other similar formats.

Parameters:

  • sample (String)

    Beginning several bytes (~ 1K) of input.

Returns:

  • (Boolean)


41
42
43
# File 'lib/rdf/microdata/format.rb', line 41

def self.detect(sample)
  !!sample.match(/<[^>]*(itemprop|itemtype|itemref|itemscope|itemid)[^>]*>/m)
end