Class: Microformats2::PropertyParser

Inherits:
ParserCore show all
Defined in:
lib/microformats2/property_parser.rb

Constant Summary

Constants inherited from ParserCore

Microformats2::ParserCore::FORMAT_CLASS_REG_EXP, Microformats2::ParserCore::PROPERTY_CLASS_REG_EXP, Microformats2::ParserCore::VALUE_CLASS_REG_EXP, Microformats2::ParserCore::VALUE_TITLE_CLASS_REG_EXP

Instance Method Summary collapse

Methods inherited from ParserCore

#initialize

Constructor Details

This class inherits a constructor from Microformats2::ParserCore

Instance Method Details

#parse(element, base: nil, element_type:, format_class_array: [], backcompat: nil) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/microformats2/property_parser.rb', line 4

def parse(element, base: nil, element_type: , format_class_array: [], backcompat: nil)
  @base = base
  @value = nil
  @property_type = element_type

  @fmt_classes = format_class_array
  @mode_backcompat = backcompat

  if element_type == 'p'
    parse_value_class_pattern(element)

    if @value.nil?
      if element.name == 'abbr' and not element.attribute('title').nil?
        @value = element.attribute('title').value.strip
      elsif (element.name == 'data' or element.name == 'input') and not element.attribute('value').nil?
        @value = element.attribute('value').value.strip
      elsif (element.name == 'img' or element.name == 'area') and not element.attribute('alt').nil?
        @value = element.attribute('alt').value.strip
      else
        @value = render_text_and_replace_images(element, base: @base)
      end
    end

  elsif element_type == 'e'
    @value = {
      value: render_text(element, base: @base), #TODO the spec doesn't say to remove script and style tags, assuming this to be in error
      html: element.inner_html.gsub(/\A +/, '').gsub(/ +\Z/, '')
    }

  elsif element_type == 'u'
    if ['a', 'area'].include? element.name and not element.attribute('href').nil?
      @value = element.attribute('href').value.strip
    elsif ['img', 'audio', 'video', 'source'].include? element.name and not element.attribute('src').nil?
      @value = element.attribute('src').value.strip
    elsif element.name == 'video' and not element.attribute('poster').nil?
      @value = element.attribute('poster').value.strip
    elsif element.name == 'object' and not element.attribute('data').nil?
      @value = element.attribute('data').value.strip
    end

    if not @value.nil?
      @value = Microformats2::AbsoluteUri.new(@value, base: @base).absolutize
    else

      parse_value_class_pattern(element)

      if @value.nil?
        if element.name == 'abbr' and not element.attribute('title').nil?
          @value = element.attribute('title').value.strip
        elsif (element.name == 'data' or element.name == 'input') and not element.attribute('value').nil?
          @value = element.attribute('value').value.strip
        else
          @value = render_text(element, base: @base)
        end

      end
    end

  elsif element_type == 'dt'
    @value = Microformats2::TimePropertyParser.new.parse(element, base: base, element_type: element_type, format_class_array: format_class_array, backcompat: backcompat)

  end

  @value
end

#parse_element(element) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/microformats2/property_parser.rb', line 76

def parse_element(element)
  if value_title_classes(element).length >= 1
    @value_class_pattern_value << element.attribute('title').value.strip

  elsif value_classes(element).length >= 1
    if element.name == 'img' or element.name == 'area' and not element.attribute('alt').nil?
      @value_class_pattern_value << element.attribute('alt').value.strip
    elsif element.name == 'data' and not element.attribute('value').nil?
      @value_class_pattern_value << element.attribute('value').value.strip
    elsif element.name == 'abbr' and not element.attribute('title').nil?
      @value_class_pattern_value << element.attribute('title').value.strip
    else
      @value_class_pattern_value << element.text.strip
    end
  else
    p_classes = property_classes(element)
    p_classes = backcompat_property_classes(element) if @mode_backcompat
    if p_classes.length == 0 and format_classes(element).length == 0
      parse_node(element.children)
    end
  end
end

#parse_value_class_pattern(element) ⇒ Object



70
71
72
73
74
# File 'lib/microformats2/property_parser.rb', line 70

def parse_value_class_pattern(element)
  @value_class_pattern_value = []
  parse_node(element.children)
  @value = @value_class_pattern_value.join unless @value_class_pattern_value.empty?
end

#render_text(node, base: nil) ⇒ Object



114
115
116
117
118
119
# File 'lib/microformats2/property_parser.rb', line 114

def render_text(node, base: nil)
  new_doc = Nokogiri::HTML(node.inner_html)
  new_doc.xpath('//script').remove
  new_doc.xpath('//style').remove
  new_doc.text.strip
end

#render_text_and_replace_images(node, base: nil) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/microformats2/property_parser.rb', line 99

def render_text_and_replace_images(node, base: nil)
  new_doc = Nokogiri::HTML(node.inner_html)
  new_doc.xpath('//script').remove
  new_doc.xpath('//style').remove
  new_doc.traverse do |node|
    if node.name == 'img' and not node.attribute('alt').nil?
      node.replace(' ' + node.attribute('alt').value.to_s + ' ')
    elsif node.name == 'img' and not node.attribute('src').nil?
      absolute_url = Microformats2::AbsoluteUri.new(node.attribute('src').value.to_s, base: @base).absolutize
      node.replace(' ' + absolute_url  + ' ')
    end
  end
  new_doc.text.strip
end