Class: Govspeak::StructuredHeaderExtractor
- Inherits:
-
Object
- Object
- Govspeak::StructuredHeaderExtractor
- Defined in:
- lib/govspeak/structured_header_extractor.rb
Instance Method Summary collapse
- #add_child(header) ⇒ Object
- #add_sibling(header) ⇒ Object
- #add_top_level(header) ⇒ Object
- #add_uncle_or_aunt(header) ⇒ Object
- #call ⇒ Object
- #header_at_higher_level_than_prev?(header) ⇒ Boolean
- #header_at_same_level_as_prev?(header) ⇒ Boolean
- #header_higher_than_top_level?(header) ⇒ Boolean
- #header_one_level_lower_than_prev?(header) ⇒ Boolean
- #headers_list ⇒ Object
-
#initialize(document) ⇒ StructuredHeaderExtractor
constructor
A new instance of StructuredHeaderExtractor.
- #pop_stack_to_level(header) ⇒ Object
- #reset_stack ⇒ Object
Constructor Details
#initialize(document) ⇒ StructuredHeaderExtractor
Returns a new instance of StructuredHeaderExtractor.
20 21 22 23 24 |
# File 'lib/govspeak/structured_header_extractor.rb', line 20 def initialize(document) @doc = document @structured_headers = [] reset_stack end |
Instance Method Details
#add_child(header) ⇒ Object
67 68 69 |
# File 'lib/govspeak/structured_header_extractor.rb', line 67 def add_child(header) stack.last.headers << header end |
#add_sibling(header) ⇒ Object
62 63 64 65 |
# File 'lib/govspeak/structured_header_extractor.rb', line 62 def add_sibling(header) stack.pop stack.last.headers << header end |
#add_top_level(header) ⇒ Object
57 58 59 60 |
# File 'lib/govspeak/structured_header_extractor.rb', line 57 def add_top_level(header) structured_headers.push(header) reset_stack end |
#add_uncle_or_aunt(header) ⇒ Object
71 72 73 74 |
# File 'lib/govspeak/structured_header_extractor.rb', line 71 def add_uncle_or_aunt(header) pop_stack_to_level(header) stack.last.headers << header end |
#call ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/govspeak/structured_header_extractor.rb', line 26 def call headers_list.each do |header| next if header_higher_than_top_level?(header) if header.top_level? add_top_level(header) elsif header_at_same_level_as_prev?(header) add_sibling(header) elsif header_one_level_lower_than_prev?(header) add_child(header) elsif header_at_higher_level_than_prev?(header) add_uncle_or_aunt(header) else next # ignore semantically invalid headers end stack.push(header) end structured_headers end |
#header_at_higher_level_than_prev?(header) ⇒ Boolean
89 90 91 92 |
# File 'lib/govspeak/structured_header_extractor.rb', line 89 def header_at_higher_level_than_prev?(header) # higher level means level integer is lower stack.last && (stack.last.level > header.level) end |
#header_at_same_level_as_prev?(header) ⇒ Boolean
80 81 82 |
# File 'lib/govspeak/structured_header_extractor.rb', line 80 def header_at_same_level_as_prev?(header) stack.last && stack.last.level == header.level end |
#header_higher_than_top_level?(header) ⇒ Boolean
76 77 78 |
# File 'lib/govspeak/structured_header_extractor.rb', line 76 def header_higher_than_top_level?(header) header.level < header.top_level end |
#header_one_level_lower_than_prev?(header) ⇒ Boolean
84 85 86 87 |
# File 'lib/govspeak/structured_header_extractor.rb', line 84 def header_one_level_lower_than_prev?(header) # lower level means level integer is higher stack.last && (stack.last.level - header.level == -1) end |
#headers_list ⇒ Object
51 52 53 54 55 |
# File 'lib/govspeak/structured_header_extractor.rb', line 51 def headers_list @headers_list ||= doc.headers.map { |h| StructuredHeader.new(h.text, h.level, h.id, []) } end |
#pop_stack_to_level(header) ⇒ Object
94 95 96 97 |
# File 'lib/govspeak/structured_header_extractor.rb', line 94 def pop_stack_to_level(header) times_to_pop = stack.last.level - header.level + 1 times_to_pop.times { stack.pop } end |
#reset_stack ⇒ Object
99 100 101 |
# File 'lib/govspeak/structured_header_extractor.rb', line 99 def reset_stack @stack = [] end |