Class: Karasuba::Parser
- Inherits:
-
Object
- Object
- Karasuba::Parser
- Defined in:
- lib/karasuba/parser.rb
Constant Summary collapse
- STOPPING_ELEMENTS =
['en-todo', 'br', 'en-note']
- IGNORED_TEXT_ELEMENTS =
['img', 'map', 'table']
Instance Attribute Summary collapse
-
#todo ⇒ Object
readonly
Returns the value of attribute todo.
Instance Method Summary collapse
- #following_element(cursor) ⇒ Object
- #ignore_element?(el) ⇒ Boolean
- #ignore_link?(el) ⇒ Boolean
-
#initialize(todo_or_element, options = {}) ⇒ Parser
constructor
A new instance of Parser.
- #is_root?(element) ⇒ Boolean
- #match_content(el, link_options) ⇒ Object
- #match_href(el, link_options) ⇒ Object
- #next_element(cursor) ⇒ Object
- #parse ⇒ Object
- #stop_link?(el) ⇒ Boolean
- #stop_todo(cursor) ⇒ Object
- #stopping_element?(el) ⇒ Boolean
Constructor Details
#initialize(todo_or_element, options = {}) ⇒ Parser
Returns a new instance of Parser.
8 9 10 11 12 |
# File 'lib/karasuba/parser.rb', line 8 def initialize(todo_or_element, = {}) @todo = todo_or_element.is_a?(Todo) ? todo_or_element : Todo.new(todo_or_element, '', [], [], ) @ignore_link = [:ignore_link] @stop_link = [:stop_link] end |
Instance Attribute Details
#todo ⇒ Object (readonly)
Returns the value of attribute todo.
6 7 8 |
# File 'lib/karasuba/parser.rb', line 6 def todo @todo end |
Instance Method Details
#following_element(cursor) ⇒ Object
81 82 83 |
# File 'lib/karasuba/parser.rb', line 81 def following_element(cursor) cursor.next || (following_element(cursor.parent) unless is_root?(cursor.parent)) end |
#ignore_element?(el) ⇒ Boolean
40 41 42 |
# File 'lib/karasuba/parser.rb', line 40 def ignore_element?(el) IGNORED_TEXT_ELEMENTS.include?(el.name) || ignore_link?(el) end |
#ignore_link?(el) ⇒ Boolean
44 45 46 47 |
# File 'lib/karasuba/parser.rb', line 44 def ignore_link?(el) return false unless @ignore_link match_href(el, @ignore_link) && match_content(el, @ignore_link) end |
#is_root?(element) ⇒ Boolean
85 86 87 |
# File 'lib/karasuba/parser.rb', line 85 def is_root?(element) element.name == 'en-note' end |
#match_content(el, link_options) ⇒ Object
63 64 65 66 67 68 69 70 |
# File 'lib/karasuba/parser.rb', line 63 def match_content(el, ) return true unless [:content] if [:content].is_a?(Regexp) [:content].match(el.content) else [:content] == el.content end end |
#match_href(el, link_options) ⇒ Object
54 55 56 57 58 59 60 61 |
# File 'lib/karasuba/parser.rb', line 54 def match_href(el, ) return true unless [:href] if [:href].is_a?(Regexp) [:href].match(el['href']) else [:href] == el['href'] end end |
#next_element(cursor) ⇒ Object
72 73 74 75 76 77 78 79 |
# File 'lib/karasuba/parser.rb', line 72 def next_element(cursor) if @ignore_children @ignore_children = false following_element(cursor) else cursor.children.first || following_element(cursor) end end |
#parse ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/karasuba/parser.rb', line 14 def parse cursor = todo.element while(cursor = next_element(cursor)) stop_todo(cursor) && break if stopping_element?(cursor) todo.following_siblings << cursor if ignore_element?(cursor) @ignore_children = true else if cursor.text? todo.text_sibblings << cursor todo.title << cursor.text end end end todo end |
#stop_link?(el) ⇒ Boolean
49 50 51 52 |
# File 'lib/karasuba/parser.rb', line 49 def stop_link?(el) return false unless @stop_link match_href(el, @stop_link) && match_content(el, @stop_link) end |
#stop_todo(cursor) ⇒ Object
31 32 33 34 |
# File 'lib/karasuba/parser.rb', line 31 def stop_todo(cursor) todo.stopped_by_link = stop_link?(cursor) todo.stopping_sibling = cursor end |
#stopping_element?(el) ⇒ Boolean
36 37 38 |
# File 'lib/karasuba/parser.rb', line 36 def stopping_element?(el) STOPPING_ELEMENTS.include?(el.name) || stop_link?(el) end |