Class: CodeModels::Html::Parser

Inherits:
Parser
  • Object
show all
Defined in:
lib/codemodels/html/parser.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeParser

Returns a new instance of Parser.



86
87
88
89
90
# File 'lib/codemodels/html/parser.rb', line 86

def initialize
  @embedded_parsers = Hash.new do |h,k|
    h[k] = []
  end
end

Class Method Details

.node_content(node, code) ⇒ Object



122
123
124
125
# File 'lib/codemodels/html/parser.rb', line 122

def self.node_content(node,code)
  pos = node_content_pos(node,code)
  code[pos[0]..pos[1]]
end

.node_content_pos(node, code) ⇒ Object



127
128
129
130
131
132
133
134
135
136
# File 'lib/codemodels/html/parser.rb', line 127

def self.node_content_pos(node,code)
  text_inside = code[(node.begin)...(node.end)]
  i  = text_inside.first_index('>') 
  start_index = node.begin+i+1
  li = text_inside.last_index('<')
  end_index    = node.begin+li-1
  raise "problem" if start_index>end_index
  #content = code[start_index,end_index-start_index]
  [start_index,end_index]
end

Instance Method Details

#parse_artifact(artifact) ⇒ Object



110
111
112
113
# File 'lib/codemodels/html/parser.rb', line 110

def parse_artifact(artifact)
  source = raw_node_tree(artifact.code)
  node_to_model(source,artifact.code,artifact)
end

#parse_code(code) ⇒ Object



106
107
108
# File 'lib/codemodels/html/parser.rb', line 106

def parse_code(code)
  parse_artifact(FileArtifact.new('<code>',code))
end

#parse_file(path) ⇒ Object



92
93
94
95
# File 'lib/codemodels/html/parser.rb', line 92

def parse_file(path)
  code = IO.read(path)
  parse_artifact(FileArtifact.new(path,code))
end

#raw_node_tree(code) ⇒ Object



97
98
99
100
101
102
103
104
# File 'lib/codemodels/html/parser.rb', line 97

def raw_node_tree(code)
  Java::net.htmlparser.jericho.Config.IsHTMLEmptyElementTagRecognised = true
  xhtml = Java::net.htmlparser.jericho.Config::CompatibilityMode::XHTML
  Java::net.htmlparser.jericho.Config.CurrentCompatibilityMode = xhtml
  reader = java.io.StringReader.new code
  source = Java::net.htmlparser.jericho.Source.new reader
  source
end

#register_embedded_parser(node_class, embedded_parser, &selector) ⇒ Object

It operates on original node, not on the model obtained because it could have less information. For example in parsing scripts I need the raw content



118
119
120
# File 'lib/codemodels/html/parser.rb', line 118

def register_embedded_parser(node_class,embedded_parser,&selector)
  @embedded_parsers[node_class] << {embedded_parser: embedded_parser, selector: selector}
end