Class: Maruku

Inherits:
MDElement show all
Includes:
REXML
Defined in:
lib/maruku/parse_span.rb,
lib/maruku/to_html.rb,
lib/maruku/to_latex.rb,
lib/maruku/structures.rb,
lib/maruku/parse_block.rb,
lib/maruku/string_utils.rb

Overview

The Maruku class holds static data for the document

Constant Summary collapse

Definition =

Example:

^:blah blah
^: blah blah
^   : blah blah
%r{ 
	^ # begin of line
	[ ]{0,3} # up to 3 spaces
	: # colon
	\s* # whitespace
	(\S.*) # the text    = $1
	$ # end of line
}x
Abbreviation =

Example:

*[HTML]: Hyper Text Markup Language
%r{
	^  # begin of line
	\* # one asterisk
	\[ # opening bracket
	([^\]]+) # any non-closing bracket:  id = $1
	\] # closing bracket
	:  # colon
	\s* # whitespace
	(\S.*\S)* #           definition=$2
	\s* # strip this whitespace
	$   # end of line
}x
FootnoteText =
%r{
	^\s*\[(\^.+)\]: # id = $1 (including '^')
	\s*(\S.*)?$    # text = $2 (not obb.)
}x
LinkRegex =

This regex is taken from BlueCloth sources Link defs are in the form: ^[id]: n? url “optional title”

%r{
		^[ ]*\[(.+)\]:		# id = $1
 [ ]*
		<?(\S+)>?				# url = $2
 [ ]*
		(?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?)			# title = $3
[")']			# Matching ) or "
\s*(.+)?   # stuff = $4
		)?	# title is optional
}x
%r{^\s*\[(.+)\]:\s*$}
HeaderWithId =
/^(.*)\{\#([\w_-]+)\}\s*$/
TabSize =
4
MightBeTableHeader =

if contains a pipe, it could be a table header

%r{\|}
Sep =

————-:

/\s*(\:)?\s*-+\s*(\:)?\s*/
TableSeparator =

| ————-:| —————————— |

%r{^(\|?#{Sep}\|?)+\s*$}
EMailAddress =
/<([^:]+@[^:]+)>/
URL =
/^<http:/

Constants inherited from MDElement

MDElement::DEFAULT_CODE_COLOR, MDElement::TexHeaders

Instance Attribute Summary collapse

Attributes inherited from MDElement

#children, #doc, #meta, #node_type

Instance Method Summary collapse

Methods inherited from MDElement

#array_to_html, #array_to_latex, #children_to_html, #children_to_latex, #create_html_element, #define_color_if_necessary, #each_element, #get_setting, #latex_color, #latex_escape, #map_match, #match_couple_of, #obfuscate, #replace_each_string, #source2html, #to_html_abbreviation, #to_html_cell, #to_html_code, #to_html_definition, #to_html_definition_data, #to_html_definition_list, #to_html_definition_term, #to_html_email_address, #to_html_emphasis, #to_html_footnote_reference, #to_html_head_cell, #to_html_header, #to_html_hrule, #to_html_image, #to_html_immediate_link, #to_html_inline_code, #to_html_li, #to_html_li_span, #to_html_linebreak, #to_html_link, #to_html_ol, #to_html_paragraph, #to_html_quote, #to_html_raw_html, #to_html_strong, #to_html_table, #to_html_ul, #to_latex_abbreviation, #to_latex_cell, #to_latex_code, #to_latex_definition, #to_latex_definition_list, #to_latex_email_address, #to_latex_emphasis, #to_latex_footnote_reference, #to_latex_head_cell, #to_latex_header, #to_latex_hrule, #to_latex_image, #to_latex_immediate_link, #to_latex_inline_code, #to_latex_li, #to_latex_li_span, #to_latex_linebreak, #to_latex_link, #to_latex_ol, #to_latex_paragraph, #to_latex_quote, #to_latex_raw_html, #to_latex_strong, #to_latex_table, #to_latex_ul, #wrap_as_element, #wrap_as_environment, #wrap_as_span

Constructor Details

#initialize(s = nil) ⇒ Maruku

Returns a new instance of Maruku.



3
4
5
6
7
8
9
10
11
12
# File 'lib/maruku/parse_block.rb', line 3

def initialize(s=nil)
	@node_type = :document
	@doc       = self

	@refs = {}
	@footnotes = {}
	@abbreviations = {}
	
	parse_doc(s) if s 
end

Instance Attribute Details

#abbreviationsObject

Returns the value of attribute abbreviations.



30
31
32
# File 'lib/maruku/structures.rb', line 30

def abbreviations
  @abbreviations
end

#footnotesObject

Returns the value of attribute footnotes.



29
30
31
# File 'lib/maruku/structures.rb', line 29

def footnotes
  @footnotes
end

#refsObject

Returns the value of attribute refs.



28
29
30
# File 'lib/maruku/structures.rb', line 28

def refs
  @refs
end

Instance Method Details

#create_md_element(node_type, children = []) ⇒ Object



194
195
196
197
198
199
200
# File 'lib/maruku/parse_block.rb', line 194

def create_md_element(node_type, children=[])
	e = MDElement.new
	e.node_type = node_type
	e.children = children
	e.doc = self
	e
end

#create_next_stringObject

Returns the type of next line as a string breaks at first :definition



515
516
517
518
519
520
521
522
523
524
525
526
527
528
# File 'lib/maruku/parse_block.rb', line 515

def create_next_string
	s = ""; num_e = 0;
	for line in top
		c = case line_node_type(line)
			when :text; "t"
			when :empty; num_e+=1; "e"
			when :definition; "d"
			else "o"
		end
		s += c
		break if c == "d" or num_e>1
	end
	s	
end

#cur_lineObject



204
# File 'lib/maruku/parse_block.rb', line 204

def cur_line; top.empty? ? nil : top.first end

#cur_line_node_typeObject



203
# File 'lib/maruku/parse_block.rb', line 203

def cur_line_node_type; line_node_type top.first  end

#dbg_describe_ary(a, prefix = '') ⇒ Object



130
131
132
133
134
135
# File 'lib/maruku/string_utils.rb', line 130

def dbg_describe_ary(a, prefix='')
	i = 0 
	a.each do |l|
		$stderr.puts "#{prefix} (#{i+=1})##{l}#"
	end
end

#debug(s) ⇒ Object



126
127
128
# File 'lib/maruku/string_utils.rb', line 126

def debug(s)
	$stderr.puts s
end

#eventually_comes_a_def_listObject

If current line is text, a definition list is coming if 1) text,empty,*,definition



506
507
508
509
510
511
# File 'lib/maruku/parse_block.rb', line 506

def eventually_comes_a_def_list
	future = create_next_string 
	ok = future =~ %r{^t+e?d}x
#		puts "future: #{future} - #{ok}"
	ok
end

#force_linebreak?(l) ⇒ Boolean

Returns:

  • (Boolean)


137
138
139
# File 'lib/maruku/string_utils.rb', line 137

def force_linebreak?(l)
	l =~ /  $/
end

#line_node_type(l) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/maruku/string_utils.rb', line 141

def line_node_type(l)
	# raw html is like PHP Markdown Extra: at most three spaces before
	return :code     if number_of_leading_spaces(l)>=4
	return :footnote_text      if l =~ FootnoteText
	return :ref if l =~ LinkRegex or l=~ IncompleteLink
	return :abbreviation if l =~ Abbreviation
	return :definition if l =~ Definition
	# I had a bug with emails and urls at the beginning of the 
	# line that were mistaken for raw_html
	return :text if l=~EMailAddress or l=~ URL
	return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
	return :ulist    if l =~ /^\s?(\*|-)\s+.*\w+/
	return :olist    if l =~ /^\s?\d\..*\w+/
	return :empty    if l.strip.size == 0
	return :header1  if l =~ /^(=)+/ 
	return :header2  if l =~ /^([-\s])+$/ 
	return :header3  if l =~ /^(#)+\s*\S+/ 
	# at least three asterisks on a line, and only whitespace
	return :hrule    if l =~ /^(\s*\*\s*){3,1000}$/ 
	return :hrule    if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
	return :quote    if l =~ /^>/
	return :metadata if l =~ /^@/
	return :text
end

#next_lineObject



205
# File 'lib/maruku/parse_block.rb', line 205

def next_line; top.empty? ? nil : top[1] end

#next_line_node_typeObject



206
# File 'lib/maruku/parse_block.rb', line 206

def next_line_node_type; (top.size >= 2) ? line_node_type(top[1]) : nil end

#num_leading_hashes(s) ⇒ Object

Counts the number of leading ‘#’ in the string



88
89
90
91
92
# File 'lib/maruku/string_utils.rb', line 88

def num_leading_hashes(s)
	i=0;
	while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
	i	
end

#number_of_leading_spaces(s) ⇒ Object

Returns the number of leading spaces, considering that a tab counts as ‘TabSize` spaces.



32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/maruku/string_utils.rb', line 32

def number_of_leading_spaces(s)
	n=0; i=0;
	while i < s.size 
		c = s[i,1]
		if c == ' '
			i+=1; n+=1;
		elsif c == "\t"
			i+=1; n+=TabSize;
		else
			break
		end
	end
	n
end

#parse_doc(s) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
# File 'lib/maruku/parse_block.rb', line 14

def parse_doc(s)
	# setup initial stack
	@stack = []
	
	@meta = parse_email_headers(s)
	lines =  split_lines(@meta[:data])
	@children = parse_lines_as_markdown(lines)
	
	self.search_abbreviations
	self.substitute_markdown_inside_raw_html
end

#parse_email_headers(s) ⇒ Object

This parses email headers. Returns an hash. hash is the message



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/maruku/string_utils.rb', line 14

def parse_email_headers(s)
	keys={}
	match = (s =~ /((\w+: .*\n)+)\n/)
	if match != 0
		keys[:data] = s
	else
		keys[:data] = $'
		headers = $1
		headers.split("\n").each do |l| 
			k, v = l.split(':')
			keys[k.strip.downcase.to_sym] = v.strip
		end
	end
	keys
end

#parse_lines_as_markdown(lines) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/maruku/parse_block.rb', line 78

def parse_lines_as_markdown(lines)
	@stack.push lines
	output = [];  =  = nil
	# run state machine
	while cur_line
		
#  Prints detected type
#			puts "#{cur_line_node_type}|#{cur_line}"
		case cur_line_node_type
			when :empty; 
				shift_line; 
			when :text
				if cur_line =~ MightBeTableHeader and 
					(next_line && next_line =~ TableSeparator)
					output << read_table
				elsif [:header1,:header2].include? next_line_node_type
					e = create_md_element(:header)
					line = shift_line.strip
					if line =~ HeaderWithId 
						line = $1.strip
						e.meta[:id] = $2
					end
					e.children = parse_lines_as_span [ line ]

					e.meta[:level] = cur_line_node_type == :header2 ? 2 : 1
					shift_line

					output << e
				elsif eventually_comes_a_def_list
				 	definition = read_definition
					if output.last && output.last.node_type == :definition_list
						output.last.children << definition
					else
						output << create_md_element(:definition_list, [definition])
					end
				
				else # Start of a paragraph
					output << read_paragraph
				end
			when :header2, :hrule
				# hrule
				shift_line
				output << create_md_element(:hrule)
			when :header3
				e = create_md_element(:header)
				line = shift_line.strip
				if line =~ HeaderWithId 
					line = $1.strip
					e.meta[:id] = $2
				end
				
				e.meta[:level] = num_leading_hashes(line)
				e.children =  parse_lines_as_span [strip_hashes(line)] 
				output << e
			when :ulist, :olist
				list_type = cur_line_node_type == :ulist ? :ul : :ol
				li = read_list_item
				# append to current list if we have one
				if output.last && output.last.node_type == list_type
					output.last.children << li
				else
					output << create_md_element(list_type, [li])
				end
			when :quote;    output << read_quote
			when :code;     e = read_code; output << e if e
			when :raw_html; output << read_raw_html

			# these do not produce output
			when :footnote_text; read_footnote_text
			when :ref;           read_ref
			when :abbreviation;  read_abbreviation
			when :metadata;       = 
				
			# warn if we forgot something
			else
				node_type = cur_line_node_type
				$stderr.puts "Ignoring line '#{shift_line}' type = #{node_type}"
		end
		
		if  and output.last
			output.last.meta.merge! 
			 = nil
#				puts "meta for #{output.last.node_type}\n #{output.last.meta.inspect}"
		end
		 = 
		 = nil
	end
	# pop the stack
	@stack.pop
	
	# See for each list if we can omit the paragraphs and use li_span
	output.each do |c| 
		# Remove paragraphs that we can get rid of
		if [:ul,:ol].include? c.node_type 
			if c.children.all? {|li| !li.meta[:want_my_paragraph]} then
				c.children.each do |d|
					d.node_type = :li_span
					d.children = d.children[0].children 
				end
			end
		end 
		if c.node_type == :definition_list
			if c.children.all?{|defi| !defi.meta[:want_my_paragraph]} then
				c.children.each do |definition| 
					dds = definition.meta[:definitions] 
					dds.each do |dd|
						dd.children = dd.children[0].children 
					end
				end
			end
		end 
	end
	
	output
end

#parse_lines_as_span(lines) ⇒ Object

Takes care of all span-level formatting, links, images, etc.

Lines must not contain block-level elements.



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# File 'lib/maruku/parse_span.rb', line 11

def parse_lines_as_span(lines)
	
	# first, get rid of linebreaks
	res = resolve_linebreaks(lines)

	span = MDElement.new
	span.children = res

	# encode all escapes
	span.replace_each_string { |s| s.escape_md_special }


# The order of processing is significant: 
# 1. inline code
# 2. immediate links
# 3. inline HTML 
# 4. everything else

	# search for ``code`` markers
	span.match_couple_of('``') { |children, match1, match2| 
		e = create_md_element(:inline_code)
		e.meta[:raw_code] = children.join('') # this is now opaque to processing
		e
	}

	# Search for `single tick`  code markers
	span.match_couple_of('`') { |children, match1, match2|
		e = create_md_element(:inline_code)
		e.meta[:raw_code] = children.join('').unescape_md_special 
		# this is now opaque to processing
		e
	}
	
	# Detect any immediate link: <http://www.google.com>
	# we expect an http: or something: at the beginning
	span.map_match( /<(\w+:[^\>]+)>/) { |match| 
		url = match[1]
		
		e = create_md_element(:immediate_link, [])
		e.meta[:url] = url
		e
	}
	
	# Search for inline HTML (the support is pretty basic for now)
	
	# this searches for a matching block
	inlineHTML1 = %r{
		(   # put everything in 1 
		<   # open
		(\w+) # opening tag in 2
		>   # close
		.*  # anything
		</\2> # match closing tag
		)
	}x

	# this searches for only one block
	inlineHTML2 = %r{
		(   # put everything in 1 
		<   # open
		\w+ # 
		    # close
		[^<>]*  # anything except
		/> # closing tag
		)
	}x
	
	for reg in [inlineHTML1, inlineHTML2]
		span.map_match(reg) { |match| 
			raw_html = (match[1] || raise("No html?"))
			e = create_md_element(:raw_html)
			e.meta[:raw_html]  = raw_html
			begin
				e.meta[:parsed_html] = Document.new(raw_html)
			rescue 
				$stderr.puts "Malformed HTML:\n#{raw_html}"
			end
			e
		}
	end
	
	# Detect footnotes references: [^1]
	span.map_match(/\[(\^[^\]]+)\]/) { |match| 
		id = match[1].strip.downcase
		e = create_md_element(:footnote_reference)
		e.meta[:footnote_id] = id
		e
	}

	# Detect any image like ![Alt text][url]
	span.map_match(/\!\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
		alt = match[1]
		id = match[2].strip.downcase
		
		if id.size == 0
			id = text.strip.downcase
		end
		
		e = create_md_element(:image)
		e.meta[:ref_id] = id
		e
	}

	# Detect any immage with immediate url: ![Alt](url "title")
	# a dummy ref is created and put in the symbol table
	link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
	span.map_match(link1) { |match|
		alt = match[1]
		url = match[2]
		title = match[3]
		
		url = url.strip
		# create a dummy id
		id="dummy_#{@refs.size}"
		@refs[id] = {:url=>url, :title=>title}
		
		e = create_md_element(:image)
		e.meta[:ref_id] = id
		e
	}

	# an id reference: "[id]",  "[ id  ]"
	reg_id_ref = %r{
		\[ # opening bracket 
		([^\]]*) # 0 or more non-closing bracket (this is too permissive)
		\] # closing bracket
		}x
		
	
	# validates a url, only $1 is set to the url
		reg_url = 
		/((?:\w+):\/\/(?:\w+:{0,1}\w*@)?(?:\S+)(?::[0-9]+)?(?:\/|\/([\w#!:.?+=&%@!\-\/]))?)/
	reg_url = %r{([^\s\]\)]+)}
	
	# A string enclosed in quotes.
	reg_title = %r{
		" # opening
		[^"]*   # anything = 1
		" # closing
		}x
	
	# (http://www.google.com "Google.com"), (http://www.google.com),
	reg_url_and_title = %r{
		\(  # opening
		\s* # whitespace 
		#{reg_url}  # url = 1 
		(?:\s+["'](.*)["'])? # optional title  = 2
		\s* # whitespace 
		\) # closing
	}x
	
	# Detect a link like ![Alt text][id]
	span.map_match(/\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
		text = match[1]
		id = match[2].strip.downcase
		
		if id.size == 0
			id = text.strip.downcase
		end

		children = parse_lines_as_span(text)
		e = create_md_element(:link, children)
		e.meta[:ref_id] = id
		e
	}
	
	# Detect any immage with immediate url: ![Alt](url "title")
	# a dummy ref is created and put in the symbol table
	link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
	span.map_match(link1) { |match|
		text = match[1]
		children = parse_lines_as_span(text)
		
		url = match[2]
		title = match[3]
		
		url = url.strip
		# create a dummy id
		id="dummy_#{@refs.size}"
		@refs[id] = {:url=>url, :title=>title}
		@refs[id][:title] = title if title
		
		e = create_md_element(:link, children)
		e.meta[:ref_id] = id
		e
	}
	

	# Detect any link like [Google engine][google]
	span.match_couple_of('[',  # opening bracket
		%r{\]                   # closing bracket
		[ ]?                    # optional whitespace
		#{reg_id_ref} # ref id, with $1 being the reference 
		}x
			) { |children, match1, match2| 
		id = match2[1]
		id = id.strip.downcase
		
		if id.size == 0
			id = children.join.strip.downcase
		end
		
		e = create_md_element(:link, children)
		e.meta[:ref_id] = id
		e
	}

	# Detect any link with immediate url: [Google](http://www.google.com)
	# a dummy ref is created and put in the symbol table
	span.match_couple_of('[',  # opening bracket
			%r{\]                   # closing bracket
			[ ]?                    # optional whitespace
			#{reg_url_and_title}    # ref id, with $1 being the url and $2 being the title
			}x
				) { |children, match1, match2| 
		
		url   = match2[1]
		title = match2[3] # XXX? Is it a bug? I would use [2]
		 
		# create a dummy id
		id="dummy_#{@refs.size}"
		@refs[id] = {:url=>url}
		@refs[id][:title] = title if title

		e = create_md_element(:link, children)
		e.meta[:ref_id] = id
		e
	}

	# Detect an email address <[email protected]>
	span.map_match(EMailAddress) { |match| 
		email = match[1]
		e = create_md_element(:email_address, [])
		e.meta[:email] = email
		e
	}


	# And now the easy stuff

	# search for **strong**
	span.match_couple_of('**') { |children,m1,m2|  create_md_element(:strong,   children) }

	# search for __strong__
	span.match_couple_of('__') { |children,m1,m2|  create_md_element(:strong,   children) }

	# search for *emphasis*
	span.match_couple_of('*')  { |children,m1,m2|  create_md_element(:emphasis, children) }
	
	# search for _emphasis_
	span.match_couple_of('_')  { |children,m1,m2|  create_md_element(:emphasis, children) }
	
	# finally, unescape the special characters
	span.replace_each_string { |s|  s.unescape_md_special}
	
	span.children
end

#parse_metadata(l) ⇒ Object

parse one metadata line



416
417
418
419
420
421
422
423
424
425
426
427
# File 'lib/maruku/parse_block.rb', line 416

def (l)
	hash = {}
	# remove leading '@'
	l = l[1, l.size].strip
	l.split(';').each do |kv|
		k, v = kv.split(':')
		v ||= 'true'
		k = k.strip.to_sym
		hash[k] = v.strip
	end
	hash 
end

#parse_text_as_markdown(text) ⇒ Object

Splits the string and calls parse_lines_as_markdown



73
74
75
76
# File 'lib/maruku/parse_block.rb', line 73

def parse_text_as_markdown(text)
	lines =  split_lines(text)
	parse_lines_as_markdown(lines)
end

#read_abbreviationObject



280
281
282
283
284
285
286
# File 'lib/maruku/parse_block.rb', line 280

def read_abbreviation
	shift_line =~ Abbreviation
	abbrev = $1
	description = $2
	
	@abbreviations[abbrev] = description
end

#read_codeObject



383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/maruku/parse_block.rb', line 383

def read_code
	e = create_md_element(:code)
	# collect all indented lines
	lines = []
	while cur_line && ([:code, :empty].include? cur_line_node_type)
		lines << strip_indent(shift_line, 4)
	end
	
	while lines.last && (line_node_type(lines.last) == :empty )
		lines.pop 
	end
	
	return nil if lines.empty?
	
#		dbg_describe_ary(lines, 'CODE')
	e.meta[:raw_code] = lines.join("\n")
	e
end

#read_definitionObject



530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
# File 'lib/maruku/parse_block.rb', line 530

def read_definition
	# Read one or more terms
	terms = []
	while  cur_line &&  cur_line_node_type == :text
		terms << create_md_element(:definition_term, parse_lines_as_span([shift_line]))
	end
#		dbg_describe_ary(terms, 'DT')

	want_paragraph = false

	raise "Bug!Bug" if not cur_line
				
	# one optional empty
	if cur_line_node_type == :empty
		want_my_paragraph = true
		shift_line
	end
	
	raise "Bug!Bug" if cur_line_node_type != :definition
	
	# Read one or more definitions
	definitions = []
	while cur_line && cur_line_node_type == :definition
		first = shift_line
		first =~ Definition
		first = $1
		
		# I know, it's ugly!!!

		lines, w_m_p = 
			read_indented_content(4, [:definition], :definition)
		want_my_paragraph ||= w_m_p
	
		lines.unshift first
		
#			dbg_describe_ary(lines, 'DD')
		
		children = parse_lines_as_markdown(lines)
		definitions << create_md_element(:definition_data, children)
	end
	
	definition = create_md_element(:definition)
	definition.meta[:terms] = terms
	definition.meta[:definitions] = definitions
	definition.children = terms + definitions
	definition.meta[:want_my_paragraph] = want_my_paragraph
	definition
end

#read_footnote_textObject



288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
# File 'lib/maruku/parse_block.rb', line 288

def read_footnote_text
	first = shift_line
	
	first =~ FootnoteText
	id = $1
	text = $2

	# Ugly things going on inside `read_indented_content`
	indentation = 4 #first.size-text.size
	
#		puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
	
	break_list = [:footnote_text]
	item_type = :footnote_text
	lines, want_my_paragraph = 
		read_indented_content(indentation, break_list, item_type)

	# add first line
	if text && text.strip != "" then lines.unshift text end
	
	
#		dbg_describe_ary(lines, 'FOOTNOTE')
	children = parse_lines_as_markdown(lines)
	@footnotes[id] = create_md_element(:footnote, children)

end

#read_indented_content(indentation, break_list, item_type) ⇒ Object

This is the only ugly function in the code base. It is used to read list items, descriptions, footnote text



318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
# File 'lib/maruku/parse_block.rb', line 318

def read_indented_content(indentation, break_list, item_type)
	lines =[]
	# collect all indented lines
	saw_empty = false; saw_anything_after = false
	while cur_line 
		if cur_line_node_type == :empty
			saw_empty = true
			lines << shift_line
			next
		end
	
		# after a white line
		if saw_empty
			# we expect things to be properly aligned
			if number_of_leading_spaces(cur_line) < indentation
#						debug "breaking for spaces: #{cur_line}"
				break
			end
			saw_anything_after = true
		else
			break if break_list.include? cur_line_node_type
	#				break if cur_line_node_type != :text
		end
	
	#			debug "Accepted '#{cur_line}'"

		stripped = strip_indent(shift_line, indentation)
		lines << stripped
	
		# You are only required to indent the first line of 
		# a child paragraph.
		if line_node_type(stripped) == :text
			while cur_line && (cur_line_node_type == :text)
				lines << strip_indent(shift_line, indentation)
			end
		end
	end

	want_my_paragraph = saw_anything_after || 
		(saw_empty && (cur_line  && (cur_line_node_type == item_type))) 

	#		dbg_describe_ary(lines, 'LI')
	# create a new context 

	while lines.last && (line_node_type(lines.last) == :empty)
		lines.pop
	end
	
	return lines, want_my_paragraph
end

#read_list_itemObject

Reads one list item, either ordered or unordered.



259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/maruku/parse_block.rb', line 259

def read_list_item
	item_type = cur_line_node_type
	first = shift_line

	# Ugly things going on inside `read_indented_content`
	indentation = spaces_before_first_char(first)
	break_list = [:ulist, :olist]
	lines, want_my_paragraph = 
		read_indented_content(indentation, break_list, item_type)

	# add first line
		# Strip first '*' or '-' from first line
		stripped = first[indentation, first.size-1]
	lines.unshift stripped
	
	e = create_md_element(:li)
	e.children = parse_lines_as_markdown(lines)
	e.meta[:want_my_paragraph] = want_my_paragraph|| (e.children.size>1)
	e
end

#read_metadataObject

Reads a series of metadata lines with empty lines in between



403
404
405
406
407
408
409
410
411
412
413
# File 'lib/maruku/parse_block.rb', line 403

def 
	hash = {}
	while cur_line 
		case cur_line_node_type
			when :empty;  shift_line
			when :metadata; hash.merge! (shift_line)
			else break
		end
	end
	hash
end

#read_paragraphObject



245
246
247
248
249
250
251
252
253
254
# File 'lib/maruku/parse_block.rb', line 245

def read_paragraph
	lines = []
	while cur_line && cur_line_node_type == :text
		lines << shift_line
	end
#		dbg_describe_ary(lines, 'PAR')
	children = parse_lines_as_span(lines)

	e = create_md_element(:paragraph, children)
end

#read_quoteObject



370
371
372
373
374
375
376
377
378
379
380
381
# File 'lib/maruku/parse_block.rb', line 370

def read_quote
	lines = []
	# collect all indented lines
	while cur_line && line_node_type(cur_line) == :quote
		lines << unquote(shift_line)
	end
#		dbg_describe_ary(lines, 'QUOTE')

	e = create_md_element(:quote)
	e.children = parse_lines_as_markdown(lines)
	e
end

#read_raw_htmlObject



210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/maruku/parse_block.rb', line 210

def read_raw_html
	lines = []
	
	cur_line =~ %r{^<(\w+)}
	tag = $1 
#		puts "Start tag = #{tag} "
	
	while cur_line 
		break if (number_of_leading_spaces(cur_line) == 0) &&
			(not [:raw_html, :empty].include?  cur_line_node_type)

		lines << shift_line
		# check for a closing tag
		if (lines.last =~ %r{^</(\w+)}|| 
			lines.last =~ %r{</(\w+)>\s*$})  && $1 == tag
			break
		end
	end
	
#		dbg_describe_ary(lines, 'HTML')

	raw_html = lines.join("\n")

	e = create_md_element(:raw_html)

	begin
		e.meta[:parsed_html] = Document.new(raw_html)
	rescue 
		$stderr.puts "Malformed block of HTML:\n#{raw_html}"
	end
	
	e.meta[:raw_html] = raw_html
	e
end

#read_refObject



429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
# File 'lib/maruku/parse_block.rb', line 429

def read_ref
	line = shift_line
	
	# if link is incomplete, shift next line
	while cur_line && (cur_line_node_type != :ref) && 
		([1,2,3].include? number_of_leading_spaces(cur_line) )
		line += " "+ shift_line
	end
	
#		puts "total= #{line}"
	
	if match = LinkRegex.match(line)
		id = match[1]; url = match[2]; title = match[3]; 
		id = id.strip.downcase
		
		hash = self.refs[id] = {:url=>url,:title=>title}
		
		stuff=match[4]
		
		if stuff
			stuff.split.each do |couple|
#					puts "found #{couple}"
				k, v = couple.split('=')
				if v[0,1]=='"' then v = v[1, v.size-2] end
#					puts "key:_#{k}_ value=_#{v}_"
				hash[k.to_sym] = v
			end
		end
#			puts hash.inspect
		
	else
		raise "Link does not respect format: '#{line}'"
	end
end

#read_tableObject



464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
# File 'lib/maruku/parse_block.rb', line 464

def read_table
	
	def split_cells(s)
		s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
	end
	
	head = split_cells(shift_line).map{|s|
		create_md_element(:head_cell, parse_lines_as_span([s]))}
		
	separator=split_cells(shift_line)

	align = separator.map { |s|  s =~ Sep
		if $1 and $2 then :center elsif $2 then :right else :left end }
			
	num_columns = align.size
	
	if head.size != num_columns
		$stderr.puts "Head does not have #{num_columns} columns: \n#{head.inspect}"
		return create_md_element(:linebreak)
	end
			
	rows = []
	
	while cur_line && cur_line =~ /\|/
		row = split_cells(shift_line).map{|s|
			create_md_element(:cell, parse_lines_as_span([s]))}
		if head.size != num_columns
			$stderr.puts "Row does not have #{num_columns} columns: \n#{row.inspect}"
			return create_md_element(:linebreak)
		end
		rows << row
	end

	e = create_md_element(:table)
	e.meta[:align] = align
	e.children = (head+rows).flatten
	e
end

#render_footnotesObject



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/maruku/to_html.rb', line 93

def render_footnotes
	div = Element.new 'div'
	div.attributes['class'] = 'footnotes'
	div <<  Element.new('hr')
		ol = Element.new 'ol'
		@doc.meta[:footnotes_used].each_with_index do |fid, i| num = i+1
			f = @footnotes[fid]
			if f
				li =  f.wrap_as_element('li')
				li.attributes['id'] = "fn:#{num}"
				
				a = Element.new 'a'
					a.attributes['href'] = "#fnref:#{num}"
					a.attributes['rev'] = 'footnote'
					a<< Text.new('&#8617;', true, nil, true)
				li.children.last << a
				ol << li
			else
				$stderr.puts "Could not find footnote '#{fid}'"
			end
		end
	div << ol
	div
end

#resolve_linebreaks(lines) ⇒ Object

returns array containing Strings or :linebreak elements



270
271
272
273
274
275
276
277
278
279
280
281
282
283
# File 'lib/maruku/parse_span.rb', line 270

def resolve_linebreaks(lines)
	res = []
	s = ""
	lines.each do |l| 
		s += (s.size>0 ? " " : "") + l.strip
		if force_linebreak?(l)
			res << s
			res << create_md_element(:linebreak)
			s = ""
		end
	end
	res << s if s.size > 0
	res
end

#search_abbreviationsObject



26
27
28
29
30
31
32
33
34
35
36
# File 'lib/maruku/parse_block.rb', line 26

def search_abbreviations
	@abbreviations.each do |abbrev, title|
#		puts "#{abbrev} => #{title}"
		self.map_match(Regexp.new(Regexp.escape(abbrev))) {
			e = create_md_element(:abbreviation)
			e.children = [abbrev.dup]
			e.meta[:title] = title.dup if title
			e
		}
	end
end

#shift_lineObject



207
# File 'lib/maruku/parse_block.rb', line 207

def shift_line; top.shift; end

#spaces_before_first_char(s) ⇒ Object

This returns the position of the first real char in a list item

For example:

'*Hello' # => 1
'* Hello' # => 2
' * Hello' # => 3
' *   Hello' # => 5
'1.Hello' # => 2
' 1.  Hello' # => 5


57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/maruku/string_utils.rb', line 57

def spaces_before_first_char(s)
	case line_node_type(s)
	when :ulist
		i=0;
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		# skip indicator
		i+=1
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
					# 
		# while i < s.size
		# 	break if not [' ',"\t",'*','-'].include? s[i,1]
		# 	i += 1
		# end
		return i
	when :olist
		i=0;
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		# skip digits
		while s[i,1] =~ /\d/; i+=1 end
		# skip dot
		i+=1
		# skip whitespace
		while s[i,1] =~ /\s/; i+=1 end
		return i
	end
end

#split_cells(s) ⇒ Object



466
467
468
# File 'lib/maruku/parse_block.rb', line 466

def split_cells(s)
	s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
end

#split_lines(s) ⇒ Object

Split a string into lines, and chomps the newline



4
5
6
7
8
9
10
11
# File 'lib/maruku/string_utils.rb', line 4

def split_lines(s)
	a = []
	s.each_line do |l| 
		l = l.chomp
		a << l 
	end	
	a
end

#strip_hashes(s) ⇒ Object

Strips initial and final hashes



95
96
97
98
99
100
# File 'lib/maruku/string_utils.rb', line 95

def strip_hashes(s)
	s = s[num_leading_hashes(s), s.size]
	i = s.size-1
	while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end
	s[0, i+1].strip
end

#strip_indent(s, n) ⇒ Object

toglie al massimo n caratteri



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/maruku/string_utils.rb', line 109

def strip_indent(s, n) 
	i = 0
	while i < s.size && n>0
		c = s[i,1]
		if c == ' '
			n-=1;
		elsif c == "\t"
			n-=TabSize;
		else
			break
		end
		i+=1
	end
	s[i, s.size-1]
end

#substitute_markdown_inside_raw_htmlObject

(PHP Markdown extra) Search for elements that have markdown=1 or markdown=block defined



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/maruku/parse_block.rb', line 40

def substitute_markdown_inside_raw_html
	self.each_element(:raw_html) do |e|
		doc = e.meta[:parsed_html]
		if doc # valid html
			# parse block-level markdown elements in these HTML tags
			block_tags = ['div']
			# use xpath to find elements with 'markdown' attribute
			doc.elements.to_a( "//*[attribute::markdown]" ).each do |e|
				# should we parse block-level or span-level?
				parse_blocks = (e.attributes['markdown'] == 'block') || 
				               block_tags.include?(e.name)
				# remove 'markdown' attribute
				e.delete_attribute 'markdown'
				# Select all text elements of e
				e.texts.each do |original_text|
#						puts "parse_blocks = #{parse_blocks} found = #{original_text} "
					s = original_text.to_s.strip # XXX
					el = create_md_element(:dummy,
					 	parse_blocks ? parse_text_as_markdown(s) :
					                  parse_lines_as_span(s) )
					el.children_to_html.each do |x|
						e.insert_before(original_text, x)
					end
					e.delete(original_text)
				end
				
			end
		end
	end
end

#to_htmlObject

Render as an HTML fragment (no head, just the content of BODY). (returns a string)



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/maruku/to_html.rb', line 12

def to_html
	div = Element.new 'dummy'
		children_to_html.each do |e|
			div << e
		end

		# render footnotes
		if @doc.meta[:footnotes_used]
			div << render_footnotes
		end
	
	# REXML Bug? if indent!=-1 whitespace is not respected for 'pre' elements
	# containing code.
	xml =""
	div.write_children(xml,indent=-1,transitive=false,ie_hack=true)
	xml
end

#to_html_documentObject

Render to a complete HTML document (returns a string)



31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/maruku/to_html.rb', line 31

def to_html_document
	doc = to_html_document_tree
	xml  = "" 
	
	# REXML Bug? if indent!=-1 whitespace is not respected for 'pre' elements
	# containing code.
	doc.write(xml,indent=-1,transitive=false,ie_hack=true);
	xhtml10strict  = "<?xml version='1.0'?>
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>\n"
	xhtml10strict + xml
end

#to_html_document_treeObject

Render to a complete HTML document (returns a REXML document tree)



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/maruku/to_html.rb', line 45

def to_html_document_tree
	doc = Document.new(nil,{:respect_whitespace =>:all})
#	doc << XMLDecl.new
	
	root = Element.new('html', doc)
	root.add_namespace('http://www.w3.org/1999/xhtml')
	
	lang = @meta[:lang] || 'en'
	root.attributes['lang'] = lang
	root.attributes['xml:lang'] = lang
	
	head = Element.new 'head', root
	
		# Create title element
		doc_title = @meta[:title] || @meta[:subject] || ""
		title = Element.new 'title'
			title << Text.new(doc_title)
		head << title
		
		
		css = @meta[:css]
		if css
			# <link type="text/css" rel="stylesheet" href="..." />
			link = Element.new 'link'
			link.attributes['type'] = 'text/css'
			link.attributes['rel'] = 'stylesheet'
			link.attributes['href'] = css
			head << link
		end
		
	body = Element.new 'body'
	
		children_to_html.each do |e|
			body << e
		end

		# render footnotes
		if @doc.meta[:footnotes_used]
			body << render_footnotes
		end
	
		
	root << head
	root << body
	
	doc
end

#to_latexObject

Render as a LaTeX fragment



6
7
8
# File 'lib/maruku/to_latex.rb', line 6

def to_latex
	children_to_latex
end

#to_latex_documentObject

Render as a complete LaTeX document



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/maruku/to_latex.rb', line 11

def to_latex_document
	header = ""
	
	if @doc.meta[:latex_use_listings]
		header += "\\usepackage{listings}\n"
	end

	body = to_latex

	# I found a fix!! :-)
	# ## xxx only if `...` is used in footnotes
	# header += "\\usepackage{fancyvrb}\n"
	# body = "\\VerbatimFootnotes\n"+body
	
"\\documentclass{article}

#{header}
\\usepackage{hyperref}
%\\usepackage[x11names]{xcolor}
\\usepackage[usenames,dvipsnames]{color}
\\hypersetup{colorlinks=true}
\\begin{document} 
#{body}
\\end{document}
"	
end

#topObject



202
# File 'lib/maruku/parse_block.rb', line 202

def top; @stack.last end

#unquote(s) ⇒ Object

removes initial quote



104
105
106
# File 'lib/maruku/string_utils.rb', line 104

def unquote(s)
	s.gsub(/^>\s?/,'')
end