Class: Microformats2::FormatParser

Inherits:
ParserCore show all
Defined in:
lib/microformats2/format_parser.rb

Constant Summary

Constants inherited from ParserCore

ParserCore::FORMAT_CLASS_REG_EXP, ParserCore::PROPERTY_CLASS_REG_EXP, ParserCore::VALUE_CLASS_REG_EXP, ParserCore::VALUE_TITLE_CLASS_REG_EXP

Instance Method Summary collapse

Methods inherited from ParserCore

#initialize

Constructor Details

This class inherits a constructor from Microformats2::ParserCore

Instance Method Details

#parse(element, base: nil, element_type: nil, format_class_array: [], backcompat: false) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/microformats2/format_parser.rb', line 4

def parse(element, base:nil, element_type:nil, format_class_array:[], backcompat:false)
  @base = base

  @mode_backcompat = backcompat

  @properties = {}
  @children = []

  @format_property_type = element_type
  @value = nil

  @mode_backcompat = backcompat

  @fmt_classes =  format_class_array

  parse_node(element.children)

  ##### Implied Properties######
  #NOTE: much of this code may be simplified by using element.css, not sure yet, but coding to have passing tests first
  # can optimize this later
  unless @mode_backcompat
    if @properties['name'].nil?

      if element.name == 'img' and not element.attribute('alt').nil?
        @properties['name'] = [element.attribute('alt').value.strip]
      elsif element.name == 'area' and not element.attribute('alt').nil?
        @properties['name'] = [element.attribute('alt').value.strip]
      elsif element.name == 'abbr' and not element.attribute('title').nil?
        @properties['name'] = [element.attribute('title').value.strip]

      else
        child_nodes = element.children.select{|n| not n.is_a?(Nokogiri::XML::Text)}

        if child_nodes.count == 1 and child_nodes.first.is_a?(Nokogiri::XML::Element) and format_classes(child_nodes.first).empty?
          node = child_nodes.first

          #else if .h-x>img:only-child[alt]:not([alt=""]):not[.h-*] then use that img’s alt for name
          if node.name == 'img' and not node.attribute('alt').nil? and not node.attribute('alt').value.empty?
            @properties['name'] = [node.attribute('alt').value.strip]

          #else if .h-x>area:only-child[alt]:not([alt=""]):not[.h-*] then use that area’s alt for name
          elsif node.name == 'area' and not node.attribute('alt').nil? and not node.attribute('alt').value.empty?
            @properties['name'] = [node.attribute('alt').value.strip]

          #else if .h-x>abbr:only-child[title]:not([title=""]):not[.h-*] then use that abbr title for name
          elsif node.name == 'abbr' and not node.attribute('title').nil? and not node.attribute('title').value.empty?
            @properties['name'] = [node.attribute('title').value.strip]

          else
            child_nodes = node.children.select{|n| not n.is_a?(Nokogiri::XML::Text)}
            if child_nodes.count == 1 and child_nodes.first.is_a?(Nokogiri::XML::Element) and format_classes(child_nodes.first).empty?
              node = child_nodes.first

              #else if .h-x>:only-child:not[.h-*]>img:only-child[alt]:not([alt=""]):not[.h-*] then use that img’s alt for name
              if node.name == 'img' and not node.attribute('alt').nil? and not node.attribute('alt').value.empty?
                @properties['name'] = [node.attribute('alt').value.strip]

              #else if .h-x>:only-child:not[.h-*]>area:only-child[alt]:not([alt=""]):not[.h-*] then use that area’s alt for name
              elsif node.name == 'area' and not node.attribute('alt').nil? and not node.attribute('alt').value.empty?
                @properties['name'] = [node.attribute('alt').value.strip]

              #else if .h-x>:only-child:not[.h-*]>abbr:only-child[title]:not([title=""]):not[.h-*] use that abbr’s title for name
              elsif node.name == 'abbr' and not node.attribute('title').nil? and not node.attribute('title').value.empty?
                @properties['name'] = [node.attribute('title').value.strip]

              else
                @properties['name'] = [element.text.strip]

              end
            else
              @properties['name'] = [element.text.strip]
            end
          end
        else
          @properties['name'] = [element.text.strip]
        end
      end
    end # end implied name


    if @properties['photo'].nil?
      if element.name == 'img' and not element.attribute('src').nil?
        @properties['photo'] = [element.attribute('src').value]
      elsif element.name == 'object' and not element.attribute('data').nil?
        @properties['photo'] = [element.attribute('data').value]
      else

        #else if .h-x>img[src]:only-of-type:not[.h-*] then use that img src for photo

        child_img_tags_with_src = element.children.select do |child|
          child.is_a?(Nokogiri::XML::Element) and child.name == 'img' and not child.attribute('src').nil?
        end
        if child_img_tags_with_src.count == 1
          node = child_img_tags_with_src.first
          if format_classes(node).empty?
            @properties['photo'] = [node.attribute('src').value.strip]
          end
        end

        if @properties['photo'].nil?

          #else if .h-x>object[data]:only-of-type:not[.h-*] then use that object’s data for photo

          child_object_tags_with_data = element.children.select do |child|
            child.is_a?(Nokogiri::XML::Element) and child.name == 'object' and not child.attribute('data').nil?
          end
          if child_object_tags_with_data.count == 1
            node = child_object_tags_with_data.first
            if format_classes(node).empty?
              @properties['photo'] = [node.attribute('data').value.strip]
            end
          end
        end

        child_elements = element.children.select do |child| not child.is_a?(Nokogiri::XML::Text) end

        if @properties['photo'].nil? and child_elements.count == 1 and format_classes(child_elements.first).empty?

          #else if .h-x>:only-child:not[.h-*]>img[src]:only-of-type:not[.h-*], then use that img’s src for photo

          child_img_tags_with_src = child_elements.first.children.select do |child|
            child.is_a?(Nokogiri::XML::Element) and child.name == 'img' and not child.attribute('src').nil?
          end
          if child_img_tags_with_src.count == 1
            node = child_img_tags_with_src.first
            if format_classes(node).empty?
              @properties['photo'] = [node.attribute('src').value.strip]
            end
          end

          if @properties['photo'].nil?

            #else if .h-x>:only-child:not[.h-*]>object[data]:only-of-type:not[.h-*], then use that object’s data for photo
            #
            child_object_tags_with_data = child_elements.first.children.select do |child|
              child.is_a?(Nokogiri::XML::Element) and child.name == 'object' and not child.attribute('data').nil?
            end
            if child_object_tags_with_data.count == 1
              node = child_object_tags_with_data.first
              if format_classes(node).empty?
                @properties['photo'] = [node.attribute('data').value.strip]
              end
            end
          end
        end

      end
      unless @properties['photo'].nil?
        @properties['photo'] = [ Microformats2::AbsoluteUri.new(@properties['photo'].first, base: @base).absolutize ]
      end
    end

    if @properties['url'].nil?
      if element.name == 'a' and not element.attribute('href').nil?
        @properties['url'] = [element.attribute('href').value]
      elsif element.name == 'area' and not element.attribute('href').nil?
        @properties['url'] = [element.attribute('href').value]
      else
        #else if .h-x>a[href]:only-of-type:not[.h-*], then use that [href] for url
        child_a_tags_with_href = element.children.select do |child|
          child.is_a?(Nokogiri::XML::Element) and child.name == 'a' and not child.attribute('href').nil?
        end
        if child_a_tags_with_href.count == 1
          node = child_a_tags_with_href.first
          if format_classes(node).empty?
            @properties['url'] = [node.attribute('href').value.strip]
          end
        end

        if @properties['url'].nil?

          #else if .h-x>area[href]:only-of-type:not[.h-*], then use that [href] for url
          child_area_tags_with_href = element.children.select do |child|
            child.is_a?(Nokogiri::XML::Element) and child.name == 'area' and not child.attribute('href').nil?
          end
          if child_area_tags_with_href.count == 1
            node = child_area_tags_with_href.first
            if format_classes(node).empty?
              @properties['url'] = [node.attribute('href').value.strip]
            end
          end
        end

        child_elements = element.children.select do |child| not child.is_a?(Nokogiri::XML::Text) end

        if @properties['url'].nil? and child_elements.count == 1 and format_classes(child_elements.first).empty?
          child_element = child_elements.first

          #else if .h-x>:only-child:not[.h-*]>a[href]:only-of-type:not[.h-*], then use that [href] for url
          child_a_tags_with_href = child_element.children.select do |child|
            child.is_a?(Nokogiri::XML::Element) and child.name == 'a' and not child.attribute('href').nil?
          end
          if child_a_tags_with_href.count == 1
            node = child_a_tags_with_href.first
            if format_classes(node).empty?
              @properties['url'] = [node.attribute('href').value.strip]
            end
          end

          if @properties['url'].nil?

            #else if .h-x>:only-child:not[.h-*]>area[href]:only-of-type:not[.h-*], then use that [href] for url
            child_area_tags_with_href = child_element.children.select do |child|
              child.is_a?(Nokogiri::XML::Element) and child.name == 'area' and not child.attribute('href').nil?
            end
            if child_area_tags_with_href.count == 1
              node = child_area_tags_with_href.first
              if format_classes(node).empty?
                @properties['url'] = [node.attribute('href').value.strip]
              end
            end
          end
        end
      end

      unless @properties['url'].nil?
        @properties['url'] = [ Microformats2::AbsoluteUri.new(@properties['url'].first, base: @base).absolutize ]
      end
    end

  end
  ##### END Implied Properties when not in backcompat mode######

  ### imply date for dt-end if dt-start is defined with a date ###
  if not @properties['end'].nil? and not @properties['start'].nil?
    start_date = nil
    @properties['start'].each do |start_val|
      if start_val =~ /^(\d{4}-[01]\d-[0-3]\d)/
        start_date = $1 if start_date.nil?
      elsif start_val =~ /^(\d{4}-[0-3]\d\d)/
        start_date = $1 if start_date.nil?
      end
    end
    unless start_date.nil?
      @properties['end'].map! do |end_val|
        if end_val=~ /^\d{4}-[01]\d-[0-3]\d/
          end_val
        elsif end_val=~ /^\d{4}-[0-3]\d\d/
          end_val
        else
          start_date + ' ' + end_val
        end
      end
    end
  end

  if @value.nil? or @value.empty?
    if element_type == 'p' and not @properties['name'].nil? and not @properties['name'].empty?
      @value = @properties['name'].first
    elsif element_type == 'u' and not @properties['url'].nil? and not @properties['url'].empty?
      @value = @properties['url'].first
    elsif not element_type.nil?
      @value = PropertyParser.new.parse(element, base: @base, element_type: element_type,  backcompat: @mode_backcompat)
    end
  end

  h_object = {}

  h_object['value'] = @value unless @value.nil?
  h_object['type'] = format_class_array
  h_object['properties'] = @properties

  h_object['children'] = @children unless @children.empty?

  if @format_property_type == 'e'
    h_object['value'] = element.text.strip
    h_object['html'] = element.inner_html
  end

  ##todo fall back to p- dt- u- parsing if value still not set?
  #  not sure that is correct by the spec actually

  h_object

end

#parse_element(element) ⇒ Object



281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/microformats2/format_parser.rb', line 281

def parse_element(element)

  prop_classes = property_classes(element)
  prop_classes = backcompat_property_classes(element) if @mode_backcompat

  bc_classes_found = false
  fmt_classes = format_classes(element)

  if fmt_classes.empty?
      fmt_classes = backcompat_format_classes(element)
      bc_classes_found = true unless fmt_classes.empty?
  end

  if prop_classes.length >= 1

    if fmt_classes.length >= 1

      prop_classes.each do |element_class|
        element_type = element_class.downcase.split('-')[0]
        property_name = element_class.downcase.split('-')[1..-1].join('-')

        parsed_format = FormatParser.new.parse(element, base:@base, element_type: element_type, format_class_array: fmt_classes, backcompat: bc_classes_found )

        if @value.nil?
          if @format_property_type == 'p' and property_name == 'name'
            @value = parsed_format['value']
          #elsif @format_property_type == 'dt' and property_name == '???'
            #@value = parsed_format['value']
          elsif @format_property_type == 'u' and property_name == 'url'
            @value = parsed_format['value']
          end
        end

        @properties[property_name] = []  if @properties[property_name].nil?
        @properties[property_name] << parsed_format

      end

    else

      prop_classes.each do |element_class|
        element_type = element_class.downcase.split('-')[0]
        property_name = element_class.downcase.split('-')[1..-1].join('-')

        parsed_property = PropertyParser.new.parse(element, base: @base, element_type: element_type, backcompat:  @mode_backcompat)

        if not parsed_property.nil? and not parsed_property.empty?
          @properties[property_name] = []  if @properties[property_name].nil?
          @properties[property_name] << parsed_property
        end
      end
      parse_nodeset(element.children)
    end

  elsif fmt_classes.length >= 1
    @children << FormatParser.new.parse(element, base: @base, format_class_array: fmt_classes, backcompat: bc_classes_found )
  else
    parse_nodeset(element.children)
  end
end