Module: Hairaito::Nokogiri::XML::Node
- Defined in:
- lib/hairaito/nokogiri/xml/node.rb
Instance Method Summary collapse
- #exclude_offsets(offsets) ⇒ Object
-
#excluded_offsets ⇒ Array
Self node offsets were already processed.
-
#first_text_node ⇒ Nokogiri::XML::Node
First text node within self node.
- #highlight_by_ranges(ranges, options) ⇒ Object
- #matched_offsets(string, types, options) ⇒ Object
-
#next_text(base = document) ⇒ Nokogiri::XML::Node?
Next text node within base node or nil if it doesn’t exist.
- #position_by_text_node(text_node) ⇒ Object
-
#previous_text(base = document) ⇒ Nokogiri::XML::Node?
Previous text node within base node or nil if it doesn’t exist.
- #text_node_by_position(in_text_position) ⇒ Object
-
#text_nodes ⇒ Nokogiri::XML::NodeSet
All text nodes, that has self as ancestor.
-
#text_nodes_between(start_node, end_node) ⇒ Nokogiri::XML::NodeSet
All text nodes are located between specified boundaries.
- #text_range_by_index(index, demand_length = nil) ⇒ Object
-
#traverse_by_text(string, options = {}) {|node, offset| ... } ⇒ Nokogiri::XML::Node
Yields for each match of specified string in child nodes recursively.
Instance Method Details
#exclude_offsets(offsets) ⇒ Object
158 159 160 161 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 158 def exclude_offsets(offsets) @excluded_offsets ||= [] @excluded_offsets += offsets end |
#excluded_offsets ⇒ Array
Returns self node offsets were already processed.
153 154 155 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 153 def excluded_offsets @excluded_offsets ||= [] end |
#first_text_node ⇒ Nokogiri::XML::Node
Returns first text node within self node.
17 18 19 20 21 22 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 17 def first_text_node traverse do |node| return node if node.text? end nil end |
#highlight_by_ranges(ranges, options) ⇒ Object
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 128 def highlight_by_ranges(ranges, ) if [:snippet][:part_wrapper].blank? raise ArgumentError.new('Snippet part wrapper tag is not specified!') end parts = [] ranges = ranges.sort_by{|r| r[:range].first} ranges.each_with_index do |range_data, index| range = range_data[:range] parts << (range.first > 0 ? text[0..(range.first - 1)]: '') if index == 0 snippet_class = range_data[:starting] ? "#{[:snippet][:starting_part_class]}" : '' wrapper = document.create_element("#{[:snippet][:part_wrapper]}", class: "#{[:snippet][:part_wrapper_class]} #{snippet_class}") wrapper.content = text[range] parts << wrapper.to_s parts << text[(range.last + 1)..(ranges[index + 1][:range].first - 1)] if index < ranges.count - 1 parts << (range.last < text.length - 1 ? text[(range.last + 1)..(text.length - 1)]: '') if index == ranges.count - 1 end new_contents = parts.join('') replace(new_contents) end |
#matched_offsets(string, types, options) ⇒ Object
163 164 165 166 167 168 169 170 171 172 173 174 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 163 def matched_offsets(string, types, ) types = [types] unless types.is_a?(Array) offsets = [] types.each do |type| offsets << text.to_enum(:scan, build_regexp(string, type, )).map do offset = Regexp.last_match.offset(:text) # Only one highlighting per position offset unless overlapped_offsets?(excluded_offsets, offset) end.compact || [] end return *offsets end |
#next_text(base = document) ⇒ Nokogiri::XML::Node?
Returns next text node within base node or nil if it doesn’t exist.
51 52 53 54 55 56 57 58 59 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 51 def next_text(base = document) first_text_node = text_nodes.last base_text_nodes = base.text_nodes if (index = base_text_nodes.index(first_text_node)).blank? raise ArgumentError.new('Base must contain self node!') end return if index == base_text_nodes.count - 1 base_text_nodes[index + 1] end |
#position_by_text_node(text_node) ⇒ Object
107 108 109 110 111 112 113 114 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 107 def position_by_text_node(text_node) nodes = text_nodes if (index = nodes.index(text_node)) < 0 raise ArgumentError.new('Self node must contain text_node!') end return 0 if index == 0 nodes[0..index - 1].map{|node| node.text}.join('').length end |
#previous_text(base = document) ⇒ Nokogiri::XML::Node?
Returns previous text node within base node or nil if it doesn’t exist.
39 40 41 42 43 44 45 46 47 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 39 def previous_text(base = document) first_text_node = text_nodes.first base_text_nodes = base.text_nodes if (index = base_text_nodes.index(first_text_node)).blank? raise ArgumentError.new('Base must contain self node!') end return if index == 0 base_text_nodes[index - 1] end |
#text_node_by_position(in_text_position) ⇒ Object
116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 116 def text_node_by_position(in_text_position) text_nodes.each do |node| # Node does not contain parent_index if node.text.length - 1 < in_text_position in_text_position -= node.text.length next end return node, in_text_position end raise ArgumentError.new('Inner index is out of range!') end |
#text_nodes ⇒ Nokogiri::XML::NodeSet
Returns all text nodes, that has self as ancestor.
7 8 9 10 11 12 13 14 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 7 def text_nodes result_nodes = [] traverse do |node| result_nodes << node if node.text? end result_nodes ::Nokogiri::XML::NodeSet.new(document, result_nodes) end |
#text_nodes_between(start_node, end_node) ⇒ Nokogiri::XML::NodeSet
Returns all text nodes are located between specified boundaries.
27 28 29 30 31 32 33 34 35 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 27 def text_nodes_between(start_node, end_node) nodes = text_nodes indexes = [nodes.index(start_node), nodes.index(end_node)] raise ArgumentError.new('Node must contain both start and end nodes!') if indexes.compact.count < 2 # Start and end nodes are equals or are neighbours return [] if indexes.last - indexes.first < 2 result_nodes = nodes.slice((indexes.first + 1)..(indexes.last - 1)) ::Nokogiri::XML::NodeSet.new(document, result_nodes) end |
#text_range_by_index(index, demand_length = nil) ⇒ Object
148 149 150 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 148 def text_range_by_index(index, demand_length = nil) demand_length.present? ? index..[text.length - 1, index + demand_length - 1].min : 0..index end |
#traverse_by_text(string, options = {}) {|node, offset| ... } ⇒ Nokogiri::XML::Node
Yields for each match of specified string in child nodes recursively
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/hairaito/nokogiri/xml/node.rb', line 68 def traverse_by_text(string, = {}, &block) traverse_by_text_defaults() traverse do |current_node| next if current_node.text? offset_types = @tbt_opts[:whole_words_only] ? [:inner_word, :boundary_word] : [:simple] inner_offsets, boundary_offsets = current_node.matched_offsets(string, offset_types, @tbt_opts) # Check words bordered with current inline tag if current node has boundary words # abc<span>def<span> or <span>def</span>ghi or abc<span>def</span>ghi if current_node.name.in?(@tbt_opts[:inline_tags]) && self != current_node if boundary_offsets.try(:first).try(:first) == 0 previous_node = current_node.previous_text(self) boundary_offsets.shift if previous_node.try(:matched_offsets, :any, :ending_word, @tbt_opts).present? end if boundary_offsets.try(:last).try(:first) == 0 next_node = current_node.next_text(self) boundary_offsets.pop if next_node.try(:matched_offsets, :any, :beginning_word, @tbt_opts).present? end end offsets = (inner_offsets + (boundary_offsets || [])).sort_by{|offset| offset.first} if offsets.any? offsets.each {|offset| yield(current_node, offset)} if block_given? if current_node != self # Excludes processed offsets from all ancestors ([current_node] + current_node.ancestors).each do |node| pos = node.position_by_text_node(current_node.first_text_node) # Shifts all offsets according to node inner position and excludes from future processing node.exclude_offsets(offsets.map{|offset| [offset.first + pos, offset.last + pos]}) # Reaches highlighting base break if node == self end end end end self end |