Class: HTMLParser

Inherits:
Object
  • Object
show all
Defined in:
lib/html-renderer/html_parser.rb

Defined Under Namespace

Classes: Tag

Constant Summary collapse

OPEN_TAG_RE =
%r{<([^>]+)>}
CLOSE_TAG_RE =
%r{</([^>]+)>}
TEXT_RE =
%r{[^<]+}
ATTR_RE =
%r{(\w+)=(?:"([^"]+)"|'([^']+)'|(\w+))}

Instance Method Summary collapse

Constructor Details

#initialize(html) ⇒ HTMLParser

Returns a new instance of HTMLParser.



50
51
52
53
# File 'lib/html-renderer/html_parser.rb', line 50

def initialize(html)
  @s = StringScanner.new(html)
  # @s = html
end

Instance Method Details

#as_treeObject



68
69
70
# File 'lib/html-renderer/html_parser.rb', line 68

def as_tree
  tree.map { |e| e.recursive_inspect }
end

#each_tagObject



55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/html-renderer/html_parser.rb', line 55

def each_tag
    until @s.eos?
    if @s.scan(CLOSE_TAG_RE)
      yield [:close_tag, @s.captures.first]
    elsif @s.scan(OPEN_TAG_RE)
      tag = Tag.from_str(@s.captures.first)
      yield [:open_tag, tag]
    elsif @s.scan(TEXT_RE)
      yield [:text, @s.matched]
    end
  end
end

#treeObject



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/html-renderer/html_parser.rb', line 72

def tree
  stack = Stack.new
  stack.push Tag.new("root")

  each_tag do |type, elem|
    case type
    when :text
      text = elem.strip
      stack.top.children << text unless text.empty?
    when :open_tag
      stack.top.children << elem
      stack.push elem
    when :close_tag
      stack.pop
    else
      raise "wat"
    end
  end

  stack
end