50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
# File 'lib/deba/extractor.rb', line 50
def process(node)
if @options.key?(:exclude)
return if Array(@options[:exclude]).any? { |selector| node.matches?(selector) }
end
node_name = node.name.downcase
return if SKIP_TAGS.include?(node_name)
if node_name == 'br'
if @just_appended_br
@just_appended_br = false
@document.break(Deba::Paragraph)
return
else
@just_appended_br = true
end
elsif @just_appended_br
@just_appended_br = false
@document << "\n"
end
if node.text?
@document << Deba::Span.new(node.inner_text) if Deba::Utils.present?(node.inner_text)
return
end
if ENHANCERS.keys.flatten.include?(node_name)
ENHANCERS.each_pair do |tags, nsf_rep|
if tags.include?(node_name)
@document << nsf_rep
node.children.each { |n| process(n) }
@document << nsf_rep
end
end
return
end
if node_name == 'blockquote'
@in_blockquote = true
@document.break(Deba::Paragraph)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
@in_blockquote = false
return
end
if node_name == 'li'
last_item = node.xpath('count(following-sibling::li)').to_i == 0
index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
@document.break(Deba::ListItem, last_item, index)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
return
end
if node_name == 'dt'
@document.break(Deba::DefinitionTerm)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
return
end
if node_name == 'dd'
last_item = node.xpath('count(following-sibling::dd)').to_i == 0
@document.break(Deba::DefinitionDescription, last_item)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
return
end
if BLOCK_INITIATING_TAGS.include?(node_name)
@document.break(Deba::Paragraph)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
return
end
if HEADING_TAGS.include?(node_name)
@document.break(Deba::Heading, node_name[1..-1].to_i)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
return
end
node.children.each { |n| process(n) }
end
|