Class: Karasuba::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/karasuba/parser.rb

Constant Summary collapse

STOPPING_ELEMENTS =
['en-todo', 'br', 'en-note']
IGNORED_TEXT_ELEMENTS =
['img', 'map', 'table']

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(todo_or_element, options = {}) ⇒ Parser

Returns a new instance of Parser.



8
9
10
11
12
# File 'lib/karasuba/parser.rb', line 8

def initialize(todo_or_element, options = {})
  @todo        = todo_or_element.is_a?(Todo) ? todo_or_element : Todo.new(todo_or_element, '', [], [], options)
  @ignore_link = options[:ignore_link]
  @stop_link   = options[:stop_link]
end

Instance Attribute Details

#todoObject (readonly)

Returns the value of attribute todo.



6
7
8
# File 'lib/karasuba/parser.rb', line 6

def todo
  @todo
end

Instance Method Details

#following_element(cursor) ⇒ Object



81
82
83
# File 'lib/karasuba/parser.rb', line 81

def following_element(cursor)
  cursor.next || (following_element(cursor.parent) unless is_root?(cursor.parent))
end

#ignore_element?(el) ⇒ Boolean

Returns:

  • (Boolean)


40
41
42
# File 'lib/karasuba/parser.rb', line 40

def ignore_element?(el)
  IGNORED_TEXT_ELEMENTS.include?(el.name) || ignore_link?(el)
end

#ignore_link?(el) ⇒ Boolean

Returns:

  • (Boolean)


44
45
46
47
# File 'lib/karasuba/parser.rb', line 44

def ignore_link?(el)
  return false unless @ignore_link
  match_href(el, @ignore_link) && match_content(el, @ignore_link)
end

#is_root?(element) ⇒ Boolean

Returns:

  • (Boolean)


85
86
87
# File 'lib/karasuba/parser.rb', line 85

def is_root?(element)
  element.name == 'en-note'
end

#match_content(el, link_options) ⇒ Object



63
64
65
66
67
68
69
70
# File 'lib/karasuba/parser.rb', line 63

def match_content(el, link_options)
  return true unless link_options[:content]
  if link_options[:content].is_a?(Regexp)
    link_options[:content].match(el.content)
  else
    link_options[:content] == el.content
  end
end

#match_href(el, link_options) ⇒ Object



54
55
56
57
58
59
60
61
# File 'lib/karasuba/parser.rb', line 54

def match_href(el, link_options)
  return true unless link_options[:href]
  if link_options[:href].is_a?(Regexp)
    link_options[:href].match(el['href'])
  else
    link_options[:href] == el['href']
  end
end

#next_element(cursor) ⇒ Object



72
73
74
75
76
77
78
79
# File 'lib/karasuba/parser.rb', line 72

def next_element(cursor)
  if @ignore_children
    @ignore_children = false
    following_element(cursor)
  else
    cursor.children.first || following_element(cursor)
  end
end

#parseObject



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/karasuba/parser.rb', line 14

def parse
  cursor       = todo.element
  while(cursor = next_element(cursor))
    stop_todo(cursor) && break if stopping_element?(cursor)
    todo.following_siblings << cursor
    if ignore_element?(cursor)
      @ignore_children = true
    else
      if cursor.text?
        todo.text_sibblings << cursor
        todo.title << cursor.text
      end
    end
  end
  todo
end

#stop_link?(el) ⇒ Boolean

Returns:

  • (Boolean)


49
50
51
52
# File 'lib/karasuba/parser.rb', line 49

def stop_link?(el)
  return false unless @stop_link
  match_href(el, @stop_link) && match_content(el, @stop_link)
end

#stop_todo(cursor) ⇒ Object



31
32
33
34
# File 'lib/karasuba/parser.rb', line 31

def stop_todo(cursor)
  todo.stopped_by_link  = stop_link?(cursor)
  todo.stopping_sibling = cursor
end

#stopping_element?(el) ⇒ Boolean

Returns:

  • (Boolean)


36
37
38
# File 'lib/karasuba/parser.rb', line 36

def stopping_element?(el)
  STOPPING_ELEMENTS.include?(el.name) || stop_link?(el)
end