Class: Fdlint::Parser::HTML::HtmlParser

Inherits:
BaseParser show all
Includes:
Helper::Logger, ParserVisitable
Defined in:
lib/fdlint/parser/html/html_parser.rb

Constant Summary collapse

TEXT =
/[^<]+/m
PROP_NAME =
%r/\w[-:\w]*/m
PROP_VALUE =
%r/'([^']*)'|"([^"]*)"|([^\s>]+)/m
PROP =
%r/#{PROP_NAME}\s*(?:=\s*#{PROP_VALUE})?/m
TAG_NAME =
/\w[^>\(\)\/\s]*/
TAG_START =
%r/<(#{TAG_NAME})/m
TAG_END =
%r/<\/#{TAG_NAME}\s*>/m
TAG =
%r/#{TAG_START}(\s+#{PROP})*\s*>/m
SELF_CLOSE_TAG =
%r/#{TAG_START}(\s+#{PROP})*\s*\/>/m
DTD =
/\s*<!(doctype)\s+(.*?)>/im
COMMENT =
/<!--(.*?)-->/m

Constants included from Helper::Logger

Helper::Logger::LEVELS

Instance Attribute Summary

Attributes inherited from BaseParser

#source

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Helper::Logger

#log, #logger

Methods included from ParserVisitable

#add_visitor, #add_visitors, included, #parse_no_throw, #results, #visitors, wrap

Methods inherited from BaseParser

#batch, #check, #eos?, #initialize, #raw_scan, #reset, #rest_source, #scan, #scanned_source, #scanner_pos, #skip, #skip_empty, #to_s

Constructor Details

This class inherits a constructor from Fdlint::Parser::BaseParser

Class Method Details

.parse(src) {|doc| ... } ⇒ Object

Yields:

  • (doc)


13
14
15
16
17
18
# File 'lib/fdlint/parser/html/html_parser.rb', line 13

def self.parse(src, &block)
  parser = self.new(src)
  doc = parser.parse
  yield doc if block_given? 
  doc
end

Instance Method Details

#parseObject



32
33
34
# File 'lib/fdlint/parser/html/html_parser.rb', line 32

def parse
  parse_doc
end

#parse_commentObject



63
64
65
66
# File 'lib/fdlint/parser/html/html_parser.rb', line 63

def parse_comment
  scan COMMENT
  CommentElement.new(@scanner[1])
end

#parse_docObject



36
37
38
39
# File 'lib/fdlint/parser/html/html_parser.rb', line 36

def parse_doc
  debug { "parse doc" }
  ::Fdlint::Parser::HTML::Document.new( batch(:parse_element) )
end

#parse_dtdObject



57
58
59
60
61
# File 'lib/fdlint/parser/html/html_parser.rb', line 57

def parse_dtd
  debug { "parse dtd" }
  node = scan(DTD)
  DTDElement.new(@scanner[2], @scanner[1], node.position)
end

#parse_elementObject



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/fdlint/parser/html/html_parser.rb', line 41

def parse_element
  if @scanner.check(DTD) and !@dtd_checked
    # only one DTD for one document
    @dtd_checked = true
    parse_dtd
  elsif @scanner.check(COMMENT)
    parse_comment
  elsif @scanner.check(TAG_START)
    parse_tag
  elsif !text_end?
    parse_text_tag
  else
    parse_error('Invalid HTML struct')
  end
end

#parse_prop_nameObject



117
118
119
# File 'lib/fdlint/parser/html/html_parser.rb', line 117

def parse_prop_name
  scan PROP_NAME
end

#parse_prop_valueObject



121
122
123
124
# File 'lib/fdlint/parser/html/html_parser.rb', line 121

def parse_prop_value
  scan PROP_VALUE
  "#{@scanner[1]}#{@scanner[2]}#{@scanner[3]}"
end

#parse_propertiesObject



96
97
98
99
100
101
102
103
104
105
# File 'lib/fdlint/parser/html/html_parser.rb', line 96

def parse_properties
  skip_empty
  props = []
  until prop_search_done? do
    prop = parse_property
    props << prop if prop
    skip_empty
  end
  props
end

#parse_propertyObject



107
108
109
110
111
112
113
114
115
# File 'lib/fdlint/parser/html/html_parser.rb', line 107

def parse_property
  name = parse_prop_name
  if @scanner.check( /\s*=/ )
    skip /[=]/
    sep = @scanner.check(/['"]/)
    value = parse_prop_value
  end
  Property.new name, value, sep
end

#parse_tagObject



84
85
86
87
88
89
90
91
92
93
94
# File 'lib/fdlint/parser/html/html_parser.rb', line 84

def parse_tag
  if @scanner.check DTD
    parse_dtd_tag
  elsif @scanner.check SELF_CLOSE_TAG
    parse_self_ending_tag
  elsif @scanner.check TAG
    parse_normal_tag
  else
    parse_error('Invalid HTML struct')
  end
end

#parse_text_tagObject



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/fdlint/parser/html/html_parser.rb', line 68

def parse_text_tag
  text = ''
  pos  = scanner_pos
  until text_end? do
    text << '<' if @scanner.skip(/</)
    text << "#{@scanner.scan(TEXT)}"

    # TODO: make this detection a rule
    parse_warn "'#{$~}' not escaped" if text =~ /<|>/ && !@parsing_script
  end
  TextElement.new( text ).tap do |text|
    text.scopes   = scopes.dup
    text.position = pos
  end
end