Class: ODDB::ChapterParse::Parser

Inherits:
BasicHtmlParser
  • Object
show all
Defined in:
ext/chapterparse/src/chaptparser.rb,
ext/chapterparse/test/test_parser.rb

Constant Summary collapse

SYMBOL_ENTITIES =
{
	# Symbol
	'913' => 'A',
	'914' => 'B',
	'915' => 'G',
	'916' => 'D',
	'917' => 'E',
	'918' => 'Z',
	'919' => 'H',
	'920' => 'Q',
	'921' => 'I',
	'922' => 'K',
	'923' => 'L',
	'924' => 'M',
	'925' => 'N',
	'926' => 'X',
	'927' => 'O',
	'928' => 'P',
	'929' => 'R',
	'931' => 'S',
	'932' => 'T',
	'933' => 'U',
	'934' => 'F',
	'935' => 'C',
	'936' => 'Y',
	'937' => 'W',
	'945' => 'a',
	'946'	=> 'b',
	'947' => 'g',
	'948' => 'd',
	'949' => 'e',
	'950' => 'z',
	'951' => 'h',
	'952' => 'q',
	'953' => 'i',
	'954' => 'k',
	'955' => 'l',
	'956' => 'm',
	'957' => 'n',
	'958' => 'x',
	'959' => 'o',
	'960' => 'p',
	'961' => 'r',
	'963' => 's',
	'964' => 't',	
	'965' => 'u',	
	'966' => 'f',
	'967' => 'c',
	'968' => 'y',
	'969' => 'w',
	'8704'=>  34.chr, # forall
	'8707'=>  36.chr, # exist
	'8727'=>  42.chr, # lowast
	'8722'=>  45.chr, # minus
	'8773'=>  64.chr, # cong
	'8869'=>  94.chr, # perp
	'8764'=> 126.chr, # sim
	'8804'=> 163.chr, # le
	'8734'=> 165.chr, # infin
	'402'	=> 166.chr, # fnof
	'8596'=> 171.chr, # harr
	'8592'=> 172.chr, # larr
	'8593'=> 173.chr, # uarr
	'8594'=> 174.chr, # rarr
	'8595'=> 175.chr, # darr
	'8805'=> 179.chr, # ge
	'8733'=> 181.chr, # prop
	'8706'=> 182.chr, # part
	'8800'=> 185.chr, # ne
	'8801'=> 186.chr, # equiv
	'8776'=> 187.chr, # asymp
	'8629'=> 191.chr, # crarr
	'8855'=> 196.chr, # otimes
	'8853'=> 197.chr, # oplus
	'8709'=> 198.chr, # empty
	'8745'=> 199.chr, # cap
	'8746'=> 200.chr, # cup
	'8835'=> 201.chr, # sup
	'8839'=> 202.chr, # supe
	'8836'=> 203.chr, # nsub
	'8834'=> 204.chr, # sub
	'8838'=> 205.chr, # sube
	'8712'=> 206.chr, # isin
	'8713'=> 207.chr, # notin
	'8736'=> 208.chr, # ang
	'8711'=> 209.chr, # nabla
	'8719'=> 213.chr, # prod
	'8730'=> 214.chr, # radic
	'8901'=> 215.chr, # sdot
	'8743'=> 217.chr, # and
	'8744'=> 218.chr, # or
	'8660'=> 219.chr, # hArr
	'8656'=> 220.chr, # lArr
	'8657'=> 221.chr, # uArr
	'8658'=> 222.chr, # rArr
	'8659'=> 223.chr, # dArr
	'8721'=> 229.chr, # sum
	'8747'=> 242.chr, # int
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Parser

int



109
110
111
112
# File 'ext/chapterparse/src/chaptparser.rb', line 109

def initialize(*args)
	super
	@release_stack = []
end

Instance Attribute Details

#nofillObject (readonly)

Returns the value of attribute nofill.



14
15
16
# File 'ext/chapterparse/test/test_parser.rb', line 14

def nofill
  @nofill
end

Instance Method Details

#analyse_attributes(attrs, release) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'ext/chapterparse/src/chaptparser.rb', line 113

def analyse_attributes(attrs, release)
	if(style = fetch_attribute('style', attrs))
		if(/\bmono(space)?\b/iu.match(style))
			start_pre(attrs)
			release.push(:end_pre)
		elsif(/\bsans-serif\b/iu.match(style))
			suspend_pre(release)
		end
		if(/\bbold\b/iu.match(style))
			start_b(attrs)
			release.push(:end_b)
		end
		if(/\bitalic\b/iu.match(style))
			start_i(attrs)
			release.push(:end_i)
		end
		if(/\bvertical-align\s*:\s*super\b/iu.match(style))
			start_sup(attrs)
			release.push(:end_sup)
		elsif(/\bvertical-align\s*:\s*sub\b/iu.match(style))
			start_sub(attrs)
			release.push(:end_sub)
		end
	elsif((klass = fetch_attribute('class', attrs)) \
		&& /\bpreformatted\b/iu.match(klass))
		start_pre(attrs)
		release.push(:end_pre)
	end
end

#end_divObject



142
143
144
# File 'ext/chapterparse/src/chaptparser.rb', line 142

def end_div
	release_tag
end

#end_fontObject



145
146
147
# File 'ext/chapterparse/src/chaptparser.rb', line 145

def end_font
	release_tag
end

#end_h2Object



148
149
150
# File 'ext/chapterparse/src/chaptparser.rb', line 148

def end_h2
	end_i
end

#end_preObject



151
152
153
154
155
156
157
158
# File 'ext/chapterparse/src/chaptparser.rb', line 151

def end_pre
  @nofill = @nofill - 1
  if(@nofill <= 0)
    @nofill = 0
    @formatter.end_paragraph(1)
  end
  @formatter.pop_font()
end

#end_spanObject



159
160
161
# File 'ext/chapterparse/src/chaptparser.rb', line 159

def end_span
	release_tag
end

#end_subObject



162
163
164
# File 'ext/chapterparse/src/chaptparser.rb', line 162

def end_sub
				@formatter.pop_fonthandler
end

#end_supObject



165
166
167
# File 'ext/chapterparse/src/chaptparser.rb', line 165

def end_sup
				@formatter.pop_fonthandler
end

#fetch_attribute(name, attrs) ⇒ Object



168
169
170
171
172
173
174
175
# File 'ext/chapterparse/src/chaptparser.rb', line 168

def fetch_attribute(name, attrs)
	attrs.reverse.each { |key, value|
		if(key == name)
			return value
		end
	}
	nil
end

#register_release_tag(&block) ⇒ Object



183
184
185
186
187
# File 'ext/chapterparse/src/chaptparser.rb', line 183

def register_release_tag(&block)
	release = []
	block.call(release)
	@release_stack.push(release)
end

#release_tagObject



176
177
178
179
180
181
182
# File 'ext/chapterparse/src/chaptparser.rb', line 176

def release_tag
	if(release = @release_stack.pop)
		release.each { |symbol|
			self.send(symbol)
		}
	end		
end

#restart_preObject



188
189
190
# File 'ext/chapterparse/src/chaptparser.rb', line 188

def restart_pre
	start_pre({})
end

#start_div(attrs) ⇒ Object



191
192
193
194
195
196
# File 'ext/chapterparse/src/chaptparser.rb', line 191

def start_div(attrs)
	register_release_tag { |release|
		analyse_attributes(attrs, release)
	}	
	@formatter.add_line_break
end

#start_font(attrs) ⇒ Object



197
198
199
200
201
202
203
204
205
206
207
208
# File 'ext/chapterparse/src/chaptparser.rb', line 197

def start_font(attrs)
	register_release_tag { |release|
		if(face = fetch_attribute('face', attrs))
			if(/\bmono(space)?\b/iu.match(face))
				start_pre(attrs)
				release.push(:end_pre)
			elsif(/\bsans-serif\b/iu.match(face))
				suspend_pre(release)
			end
		end
	}
end

#start_h2(attrs) ⇒ Object



209
210
211
# File 'ext/chapterparse/src/chaptparser.rb', line 209

def start_h2(attrs)
	start_i(attrs)
end

#start_pre(attrs) ⇒ Object



212
213
214
215
216
217
218
# File 'ext/chapterparse/src/chaptparser.rb', line 212

def start_pre(attrs)
  if(@nofill <= 0)
    @formatter.end_paragraph(1)
  end
  @formatter.push_font(nil, nil, nil, 1)
  @nofill = @nofill + 1
end

#start_span(attrs) ⇒ Object



219
220
221
222
223
# File 'ext/chapterparse/src/chaptparser.rb', line 219

def start_span(attrs)
	register_release_tag { |release|
		analyse_attributes(attrs, release)
	}
end

#start_sub(attrs) ⇒ Object



224
225
226
# File 'ext/chapterparse/src/chaptparser.rb', line 224

def start_sub(attrs)
				@formatter.push_fonthandler([['vertical-align', 'subscript']])
end

#start_sup(attrs) ⇒ Object



227
228
229
# File 'ext/chapterparse/src/chaptparser.rb', line 227

def start_sup(attrs)
				@formatter.push_fonthandler([['vertical-align', 'superscript']])
end

#suspend_pre(release) ⇒ Object



230
231
232
233
234
235
# File 'ext/chapterparse/src/chaptparser.rb', line 230

def suspend_pre(release)
	if(@nofill > 0)
		end_pre
		release.push(:restart_pre)
	end
end

#unknown_charref(ref) ⇒ Object



236
237
238
239
240
241
242
243
244
# File 'ext/chapterparse/src/chaptparser.rb', line 236

def unknown_charref(ref)
	if(char = SYMBOL_ENTITIES[ref])
		@formatter.push_fonthandler([['face', 'Symbol']])
		self.handle_data(char)
		@formatter.pop_fonthandler
     else 
		self.handle_data("?")
	end
end