23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
# File 'lib/deba/extractor.rb', line 23
def process(node)
if @options.key?(:exclude)
return if Array(@options[:exclude]).any? { |selector| node.matches?(selector) }
end
node_name = node.name.downcase
return if node_name == 'head'
if node_name == 'br'
if @just_appended_br
@just_appended_br = false
@document.break(Deba::Paragraph)
return
else
@just_appended_br = true
end
elsif @just_appended_br
@just_appended_br = false
@document << "\n"
end
if node.text?
@document << Deba::Span.new(node.inner_text) if Deba::Utils.present?(node.inner_text)
return
end
if ENHANCERS.keys.flatten.include?(node_name)
ENHANCERS.each_pair do |tags, nsf_rep|
if tags.include?(node_name)
@document << nsf_rep
node.children.each { |n| process(n) }
@document << nsf_rep
end
end
return
end
if node_name == 'blockquote'
@in_blockquote = true
@document.break(Deba::Paragraph)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
@in_blockquote = false
return
end
if node_name == 'li'
last_item = node.xpath('count(following-sibling::li)').to_i == 0
index = node.xpath('boolean(ancestor::ol)') ? (node.xpath('count(preceding-sibling::li)').to_i + 1) : nil
@document.break(Deba::ListItem, last_item, index)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
return
end
if node_name == 'dt'
@document.break(Deba::DefinitionTerm)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
return
end
if node_name == 'dd'
last_item = node.xpath('count(following-sibling::dd)').to_i == 0
@document.break(Deba::DefinitionDescription, last_item)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
return
end
if BLOCK_INITIATING_TAGS.include?(node_name)
@document.break(Deba::Paragraph)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
return
end
if HEADING_TAGS.include?(node_name)
@document.break(Deba::Heading, node_name[1..-1].to_i)
node.children.each { |n| process(n) }
@document.break(Deba::Paragraph)
return
end
node.children.each { |n| process(n) }
end
|