Module: MaRuKu::In::Markdown::BlockLevelParser

Includes:
Helpers, SpanLevelParser, Strings
Defined in:
lib/omf-web/widget/text/maruku/input/parse_block.rb

Defined Under Namespace

Classes: BlockContext

Instance Method Summary collapse

Methods included from Helpers

#md_abbr, #md_abbr_def, #md_ald, #md_br, #md_code, #md_codeblock, #md_el, #md_em, #md_email, #md_emstrong, #md_entity, #md_foot_ref, #md_footnote, #md_header, #md_hrule, #md_html, #md_ial, #md_im_image, #md_im_link, #md_image, #md_li, #md_link, #md_par, #md_quote, #md_ref_def, #md_strong, #md_url, #md_xml_instr

Instance Method Details

#eventually_comes_a_def_list(src) ⇒ Object

If current line is text, a definition list is coming if 1) text,empty,*,definition



557
558
559
560
561
562
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 557

def eventually_comes_a_def_list(src)
  future = src.tell_me_the_future
  ok = future =~ %r{^t+e?d}x
#		puts "future: #{future} - #{ok}"
  ok
end

#parse_blocks(src) ⇒ Object

Input is a LineSource



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 45

def parse_blocks(src)
  output = BlockContext.new

  # run state machine
  while src.cur_line

    next if check_block_extensions(src, output, src.cur_line)

#  Prints detected type (useful for debugging)
    #puts "PARSE>>>> #{src.cur_line.md_type}|#{src.cur_line}"
    case src.cur_line.md_type
      when :empty;
        output.push :empty
        src.ignore_line
      when :ial
        m =  InlineAttributeList.match src.shift_line
        content = m[1] ||  ""
#					puts "Content: #{content.inspect}"
        src2 = CharSource.new(content, src)
        interpret_extension(src2, output, [nil])
      when :ald
        output.push read_ald(src)
      when :text
        # paragraph, or table, or definition list
        read_text_material(src, output)
      when :header2, :hrule
        src.shift_line
        output.push md_hrule()
      when :header3
        output.push read_header3(src)
      when :ulist, :olist
        list_type = src.cur_line.md_type == :ulist ? :ul : :ol
        li = read_list_item(src)
        # append to current list if we have one
        if output.last.kind_of?(MDElement) &&
          output.last.node_type == list_type then
          output.last.children << li
        else
          output.push md_el(list_type, [li])
        end
      when :quote;    output.push read_quote(src)
      when :code;     e = read_code(src); output << e if e
      when :raw_html; e = read_raw_html(src); output << e if e

      when :footnote_text;   output.push read_footnote_text(src)
      when :ref_definition
        if src.parent && (src.cur_index == 0)
          read_text_material(src, output)
        else
          read_ref_definition(src, output)
        end
      when :abbreviation;    output.push read_abbreviation(src)
      when :xml_instr;       read_xml_instruction(src, output)
      when :metadata;
        maruku_error "Please use the new meta-data syntax: \n"+
        "  http://maruku.rubyforge.org/proposal.html\n", src
        src.ignore_line
      else # warn if we forgot something
        md_type = src.cur_line.md_type
        line = src.cur_line
        maruku_error "Ignoring line '#{line}' type = #{md_type}", src
        src.shift_line
    end
  end

  merge_ial(output, src, output)
  output.delete_if {|x| x.kind_of?(MDElement) &&
    x.node_type == :ial}

  # get rid of empty line markers
  output.delete_if {|x| x == :empty}
  # See for each list if we can omit the paragraphs and use li_span
  # TODO: do this after
  output.each do |c|
    # Remove paragraphs that we can get rid of
    if [:ul,:ol].include? c.node_type
      if c.children.all? {|li| !li.want_my_paragraph} then
        c.children.each do |d|
          d.node_type = :li_span
          d.children = d.children[0].children
        end
      end
    end
    if c.node_type == :definition_list
      if c.children.all?{|defi| !defi.want_my_paragraph} then
        c.children.each do |definition|
          definition.definitions.each do |dd|
            dd.children = dd.children[0].children
          end
        end
      end
    end
  end

  output
end

#parse_text_as_markdown(text) ⇒ Object

Splits the string and calls parse_lines_as_markdown



38
39
40
41
42
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 38

def parse_text_as_markdown(text)
  lines =  split_lines(text)
  src = LineSource.new(lines)
  return parse_blocks(src)
end

#read_abbreviation(src) ⇒ Object



308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 308

def read_abbreviation(src)
  if not (l=src.shift_line) =~ Abbreviation
    maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
  end

  abbr = $1
  desc = $2

  if (not abbr) or (abbr.size==0)
    maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
  end

  self.abbreviations[abbr] = desc

  return md_abbr_def(abbr, desc)
end

#read_ald(src) ⇒ Object



162
163
164
165
166
167
168
169
170
171
172
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 162

def read_ald(src)
  if (l=src.shift_line) =~ AttributeDefinitionList
    id = $1;   al=$2;
    al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
    self.ald[id] = al;
    return md_ald(id, al)
  else
    maruku_error "Bug Bug:\n#{l.inspect}"
    return nil
  end
end

#read_code(src) ⇒ Object



434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 434

def read_code(src)
  # collect all indented lines
  lines = []
  while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
    lines << strip_indent(src.shift_line, 4)
  end

  #while lines.last && (lines.last.md_type == :empty )
  while lines.last && lines.last.strip.size == 0
    lines.pop
  end

  while lines.first && lines.first.strip.size == 0
    lines.shift
  end

  return nil if lines.empty?

  source = lines.join("\n")

#		dbg_describe_ary(lines, 'CODE')

  return md_codeblock(source)
end

#read_definition(src) ⇒ Object



565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 565

def read_definition(src)
  # Read one or more terms
  terms = []
  while  src.cur_line &&  src.cur_line.md_type == :text
    terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
  end
#		dbg_describe_ary(terms, 'DT')

  want_my_paragraph = false

  raise "Chunky Bacon!" if not src.cur_line

  # one optional empty
  if src.cur_line.md_type == :empty
    want_my_paragraph = true
    src.shift_line
  end

  raise "Chunky Bacon!" if src.cur_line.md_type != :definition

  # Read one or more definitions
  definitions = []
  while src.cur_line && src.cur_line.md_type == :definition
    parent_offset = src.cur_index

    first = src.shift_line
    first =~ Definition
    first = $1

    # I know, it's ugly!!!

    lines, w_m_p =
      read_indented_content(src,4, [:definition], :definition)
    want_my_paragraph ||= w_m_p

    lines.unshift first

#			dbg_describe_ary(lines, 'DD')
    src2 = LineSource.new(lines, src, parent_offset)
    children = parse_blocks(src2)
    definitions << md_el(:definition_data, children)
  end

  return md_el(:definition, terms+definitions, {
    :terms => terms,
    :definitions => definitions,
    :want_my_paragraph => want_my_paragraph})
end

#read_footnote_text(src) ⇒ Object



325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 325

def read_footnote_text(src)
  parent_offset = src.cur_index

  first = src.shift_line

  if not first =~ FootnoteText
    maruku_error "Bug (it's Andrea's fault)"
  end

  id = $1
  text = $2

  # Ugly things going on inside `read_indented_content`
  indentation = 4 #first.size-text.size

#		puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"

  break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
  item_type = :footnote_text
  lines, want_my_paragraph =
    read_indented_content(src,indentation, break_list, item_type)

  # add first line
  if text && text.strip != "" then lines.unshift text end

#		dbg_describe_ary(lines, 'FOOTNOTE')
  src2 = LineSource.new(lines, src, parent_offset)
  children = parse_blocks(src2)

  e = md_footnote(id, children)
  self.footnotes[id] = e
  return e
end

#read_header12(src) ⇒ Object

reads a header (with —– or ========)



175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 175

def read_header12(src)
  line = src.shift_line.strip
  al = nil
  # Check if there is an IAL
  if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
    line = $1.strip
    ial = $2
    al  = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
  end
  text = parse_lines_as_span [ line ]
  level = src.cur_line.md_type == :header2 ? 2 : 1;
  src.shift_line
  return md_header(level, text, al, src.cur_index)
end

#read_header3(src) ⇒ Object

reads a header like ‘#### header ####’



191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 191

def read_header3(src)
#          puts "READ_H3: #{src.inspect}"
  line = src.shift_line.strip
  al = nil
  # Check if there is an IAL
  if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
    line = $1.strip
    ial = $2
    al  = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
  end
  level = num_leading_hashes(line)
  text = parse_lines_as_span [strip_hashes(line)]
  return md_header(level, text, al, src.cur_index)
end

#read_indented_content(src, indentation, break_list, item_type) ⇒ Object

This is the only ugly function in the code base. It is used to read list items, descriptions, footnote text



362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 362

def read_indented_content(src, indentation, break_list, item_type)
  lines =[]
  # collect all indented lines
  saw_empty = false; saw_anything_after = false
  while src.cur_line
#			puts "Reading indent = #{indentation} #{src.cur_line.inspect}"
    #puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
    if src.cur_line.md_type == :empty
      saw_empty = true
      lines << src.shift_line
      next
    end

    # after a white line
    if saw_empty
      # we expect things to be properly aligned
      if (ns=number_of_leading_spaces(src.cur_line)) < indentation
        #puts "breaking for spaces, only #{ns}: #{src.cur_line}"
        break
      end
      saw_anything_after = true
    else
#				if src.cur_line[0] != ?\
        break if break_list.include? src.cur_line.md_type
#				end
#				break if src.cur_line.md_type != :text
    end


    stripped = strip_indent(src.shift_line, indentation)
    lines << stripped

    #puts "Accepted as #{stripped.inspect}"

    # You are only required to indent the first line of
    # a child paragraph.
    if stripped.md_type == :text
      while src.cur_line && (src.cur_line.md_type == :text)
        lines << strip_indent(src.shift_line, indentation)
      end
    end
  end

  want_my_paragraph = saw_anything_after ||
    (saw_empty && (src.cur_line  && (src.cur_line.md_type == item_type)))

#		dbg_describe_ary(lines, 'LI')
  # create a new context

  while lines.last && (lines.last.md_type == :empty)
    lines.pop
  end

  return lines, want_my_paragraph
end

#read_list_item(src) ⇒ Object

Reads one list item, either ordered or unordered.



281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 281

def read_list_item(src)
  parent_offset = src.cur_index

  item_type = src.cur_line.md_type
  first = src.shift_line

  indentation, ial = spaces_before_first_char(first)
  al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil]) if ial
  break_list = [:ulist, :olist, :ial]
  # Ugly things going on inside `read_indented_content`
  lines, want_my_paragraph =
    read_indented_content(src,indentation, break_list, item_type)

  # add first line
    # Strip first '*', '-', '+' from first line
    stripped = first[indentation, first.size-1]
  lines.unshift stripped

  # dbg_describe_ary(lines, 'LIST ITEM ')

  src2 = LineSource.new(lines, src, parent_offset)
  children = parse_blocks(src2)
  with_par = want_my_paragraph || (children.size>1)

  return md_li(children, with_par, al)
end

#read_metadata(src) ⇒ Object

Reads a series of metadata lines with empty lines in between



460
461
462
463
464
465
466
467
468
469
470
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 460

def (src)
  hash = {}
  while src.cur_line
    case src.cur_line.md_type
      when :empty;  src.shift_line
      when :metadata; hash.merge! (src.shift_line)
      else break
    end
  end
  hash
end

#read_paragraph(src) ⇒ Object



258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 258

def read_paragraph(src)
  lines = [src.shift_line]
  while src.cur_line
    # :olist does not break
    case t = src.cur_line.md_type
      when :quote,:header3,:empty,:ref_definition,:ial #,:xml_instr,:raw_html
        break
      when :olist,:ulist
        break if src.next_line.md_type == t
    end
    break if src.cur_line.strip.size == 0
    break if [:header1,:header2].include? src.next_line.md_type
    break if any_matching_block_extension?(src.cur_line)

    lines << src.shift_line
  end
#		dbg_describe_ary(lines, 'PAR')
  children = parse_lines_as_span(lines, src)

  return md_par(children, nil, src.cur_index)
end

#read_quote(src) ⇒ Object



419
420
421
422
423
424
425
426
427
428
429
430
431
432
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 419

def read_quote(src)
  parent_offset = src.cur_index

  lines = []
  # collect all indented lines
  while src.cur_line && src.cur_line.md_type == :quote
    lines << unquote(src.shift_line)
  end
#		dbg_describe_ary(lines, 'QUOTE')

  src2 = LineSource.new(lines, src, parent_offset)
  children = parse_blocks(src2)
  return md_quote(children)
end

#read_raw_html(src) ⇒ Object



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 235

def read_raw_html(src)
  h = HTMLHelper.new
  begin
    h.eat_this(l=src.shift_line)
#			puts "\nBLOCK:\nhtml -> #{l.inspect}"
    while src.cur_line and not h.is_finished?
      l=src.shift_line
#				puts "html -> #{l.inspect}"
      h.eat_this "\n"+l
    end
  rescue Exception => e
    ex = e.inspect + e.backtrace.join("\n")
    maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
  end
  if not (h.rest =~ /^\s*$/)
    maruku_error "Could you please format this better?\n"+
      "I see that #{h.rest.inspect} is left after the raw HTML.", src
  end
  raw_html = h.stuff_you_read

  return md_html(raw_html)
end

#read_ref_definition(src, out) ⇒ Object



473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 473

def read_ref_definition(src, out)
  line = src.shift_line


  # if link is incomplete, shift next line
  if src.cur_line && !([:footnote_text, :ref_definition, :definition, :abbreviation].include? src.cur_line.md_type) &&
    ([1,2,3].include? number_of_leading_spaces(src.cur_line) )
    line += " "+ src.shift_line
  end

#		puts "total= #{line}"

  match = LinkRegex.match(line)
  if not match
    maruku_error "Link does not respect format: '#{line}'"
    return
  end

  id = match[1]; url = match[2]; title = match[3];
  id = sanitize_ref_id(id)

  hash = self.refs[id] = {:url=>url,:title=>title}

  stuff=match[4]

  if stuff
    stuff.split.each do |couple|
#					puts "found #{couple}"
      k, v = couple.split('=')
      v ||= ""
      if v[0,1]=='"' then v = v[1, v.size-2] end
#					puts "key:_#{k}_ value=_#{v}_"
      hash[k.to_sym] = v
    end
  end
#			puts hash.inspect

  out.push md_ref_def(id, url, meta={:title=>title})
end

#read_table(src) ⇒ Object



519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 519

def read_table(src)
  head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }

  separator=split_cells(src.shift_line)

  align = separator.map { |s|  s =~ Sep
    if $1 and $2 then :center elsif $2 then :right else :left end }

  num_columns = align.size

  if head.size != num_columns
    maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
    tell_user "I will ignore this table."
    # XXX try to recover
    return md_br()
  end

  rows = []

  while src.cur_line && src.cur_line =~ /\|/
    row = split_cells(src.shift_line).map{|s|
      md_el(:cell, parse_lines_as_span([s]))}
    if head.size != num_columns
      maruku_error  "Row does not have #{num_columns} columns: \n#{row.inspect}"
      tell_user "I will ignore this table."
      # XXX try to recover
      return md_br()
    end
    rows << row
  end

  children = (head+rows).flatten
  return md_el(:table, children, {:align => align})
end

#read_text_material(src, output) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 142

def read_text_material(src, output)
  if src.cur_line =~ MightBeTableHeader and
    (src.next_line && src.next_line =~ TableSeparator)
    output.push read_table(src)
  elsif [:header1,:header2].include? src.next_line.md_type
    output.push read_header12(src)
  elsif eventually_comes_a_def_list(src)
     definition = read_definition(src)
    if output.last.kind_of?(MDElement) &&
      output.last.node_type == :definition_list then
      output.last.children << definition
    else
      output.push md_el(:definition_list, [definition])
    end
  else # Start of a paragraph
    output.push read_paragraph(src)
  end
end

#read_xml_instruction(src, output) ⇒ Object



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 206

def read_xml_instruction(src, output)
  m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
  raise "BugBug" if not m
  target = m[2] || ''
  code = m[3]
  until code =~ /\?>/
    code += "\n"+src.shift_line
  end
  if not code =~ (/\?>\s*$/)
    garbage = (/\?>(.*)$/.match(code))[1]
    maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
      add_tabs(code, 1, '|'), src
  end
  code.gsub!(/\?>\s*$/, '')

  if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
    result = safe_execute_code(self, code)
    if result
      if result.kind_of? String
        raise "Not expected"
      else
        output.push(*result)
      end
    end
  else
    output.push md_xml_instr(target, code)
  end
end

#split_cells(s) ⇒ Object



513
514
515
516
517
# File 'lib/omf-web/widget/text/maruku/input/parse_block.rb', line 513

def split_cells(s)
#		s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
# changed to allow empty cells
  s.strip.split('|').select{|x|x.size>0}.map{|x|x.strip}
end