Module: MaRuKu::In::Markdown::BlockLevelParser

Includes:: Helpers, SpanLevelParser, Strings

Included in:: MDDocument

Defined in:: lib/maruku.rb,
lib/maruku/input/parse_doc.rb,
lib/maruku/input/linesource.rb,
lib/maruku/input/parse_block.rb

Defined Under Namespace

Constant Summary

Constants included from SpanLevelParser

SpanLevelParser::CharSource, SpanLevelParser::EscapedCharInInlineCode, SpanLevelParser::EscapedCharInQuotes, SpanLevelParser::EscapedCharInText, SpanLevelParser::IgnoreWikiLinks

Constants included from Strings

Strings::TAB_SIZE

Instance Method Summary collapse

#count_columns(row) ⇒ Object

count the actual number of elements in a row taking into account colspans.
#element_is_non_inline_html?(elem) ⇒ Boolean

Is the given element an HTML element whose root is not an inline element?.
#eventually_comes_a_def_list(src) ⇒ Object

If current line is text, a definition list is coming if 1) text,empty,*,definition.
#execute_code_blocks ⇒ Object
#expand_attribute_list(al, result) ⇒ Object

Expands an attribute list in an Hash.
#find_colspan(al) ⇒ Object

Search an attribute list looking for a colspan.
#parse_blocks(src) ⇒ Object

Input is a LineSource.
#parse_doc(s) ⇒ Object
#parse_text_as_markdown(text) ⇒ Object

Splits the string and calls parse_lines_as_markdown.
#pick_apart_non_inline_html(children) ⇒ Object

If there are non-inline HTML tags in the paragraph, break them out into their own elements and make paragraphs out of everything else.
#read_abbreviation(src) ⇒ Object
#read_ald(src) ⇒ Object
#read_code(src) ⇒ Object
#read_definition(src) ⇒ Object
#read_footnote_text(src) ⇒ Object
#read_header12(src) ⇒ Object

reads a header (with —– or ========).
#read_header3(src) ⇒ Object

reads a header like ‘#### header ####’.
#read_indented_content(src, indentation, break_list, item_type, ial_offset = 0) ⇒ Object

This is the only ugly function in the code base.
#read_list_item(src) ⇒ Object

Reads one list item, either ordered or unordered.
#read_paragraph(src) ⇒ Object
#read_quote(src) ⇒ Object
#read_raw_html(src) ⇒ Object
#read_ref_definition(src, out) ⇒ Object
#read_table(src) ⇒ Object
#read_text_material(src, output) ⇒ Object
#read_xml_instruction(src, output) ⇒ Object
#safe_execute_code(object, code) ⇒ Object
#search_abbreviations ⇒ Object
#split_cells(s, allowBlank = false) ⇒ Object
#substitute_markdown_inside_raw_html ⇒ Object

(PHP Markdown extra) Search for elements that have markdown=1 or markdown=block defined.

Instance Method Details

#count_columns(row) ⇒ `Object`

count the actual number of elements in a row taking into account colspans

# File 'lib/maruku/input/parse_block.rb', line 662

def count_columns(row)
  colCount = 0

  row.each do |cell|
    if cell.al && cell.al.size > 0
      al = find_colspan(cell.al)
      if al != nil
        colCount += al[1].to_i
      else
        colCount += 1
      end
    else
      colCount += 1
    end
  end

  colCount
end

#element_is_non_inline_html?(elem) ⇒ `Boolean`

Is the given element an HTML element whose root is not an inline element?

Returns:

(Boolean)

# File 'lib/maruku/input/parse_block.rb', line 326

def element_is_non_inline_html?(elem)
  if elem.is_a?(MDElement) && elem.node_type == :raw_html && elem.parsed_html
    first_node_name = elem.parsed_html.first_node_name
    first_node_name && !HTML_INLINE_ELEMS.include?(elem.parsed_html.first_node_name)
  else
    false
  end
end

#eventually_comes_a_def_list(src) ⇒ `Object`

If current line is text, a definition list is coming if 1) text,empty,*,definition



690
691
692

# File 'lib/maruku/input/parse_block.rb', line 690

def eventually_comes_a_def_list(src)
  src.tell_me_the_future =~ %r{^t+e?d}x
end

#execute_code_blocks ⇒ `Object`

# File 'lib/maruku/input/parse_doc.rb', line 123

def execute_code_blocks
  each_element(:xml_instr) do |e|
    if e.target == 'maruku'
      result = safe_execute_code(e, e.code)
      if result.kind_of?(String)
        puts "Result is : #{result.inspect}"
      end
    end
  end
end

#expand_attribute_list(al, result) ⇒ `Object`

Expands an attribute list in an Hash

# File 'lib/maruku/input/parse_doc.rb', line 72

def expand_attribute_list(al, result)
  al.each do |k, v|
    case k
    when :class
      if result[:class]
        result[:class] << " " << v
      else
        result[:class] = v
      end
    when :id
      result[:id] = v
    when :ref
      if self.ald[v]
        already = (result[:expanded_references] ||= [])
        if !already.include?(v)
          already << v
          expand_attribute_list(self.ald[v], result)
        else
          already << v
          maruku_error "Circular reference between labels.\n\n" +
          "Label #{v.inspect} calls itself via recursion.\nThe recursion is " +
            already.map(&:inspect).join(' => ')
        end
      else
        if result[:unresolved_references]
          result[:unresolved_references] << " " << v
        else
          result[:unresolved_references] = v
        end

        # $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
        result[v.to_sym] = true
      end
    else
      result[k.to_sym] = v
    end
  end
end

#find_colspan(al) ⇒ `Object`

Search an attribute list looking for a colspan



684
685
686

# File 'lib/maruku/input/parse_block.rb', line 684

def find_colspan(al)
  al.find {|alElem| alElem[0] == "colspan" }
end

#parse_blocks(src) ⇒ `Object`

Input is a LineSource

# File 'lib/maruku/input/parse_block.rb', line 24

def parse_blocks(src)
  output = BlockContext.new

  # run state machine
  while src.cur_line
    next if check_block_extensions(src, output, src.cur_line)

    md_type = src.cur_line.md_type

    # Prints detected type (useful for debugging)
    #puts "parse_blocks #{md_type}|#{src.cur_line}"
    case md_type
    when :empty
      output << :empty
      src.ignore_line
    when :ial
      m = InlineAttributeList.match src.shift_line
      content = m[1] || ""
      src2 = CharSource.new(content, src)
      interpret_extension(src2, output)
    when :ald
      output << read_ald(src)
    when :text
      # paragraph, or table, or definition list
      read_text_material(src, output)
    when :header2, :hrule
      # hrule
      src.shift_line
      output << md_hrule
    when :header3
      output << read_header3(src)
    when :ulist, :olist
      list_type = (md_type == :ulist) ? :ul : :ol
      li = read_list_item(src)
      # append to current list if we have one
      if output.last.kind_of?(MDElement) &&
          output.last.node_type == list_type then
        output.last.children << li
      else
        output << md_el(list_type, li)
      end
    when :quote
      output << read_quote(src)
    when :code
      e = read_code(src)
      output << e if e
    when :raw_html
      # More extra hacky stuff - if there's more than just HTML, we either wrap it
      # in a paragraph or break it up depending on whether it's an inline element or not
      e = read_raw_html(src)
      unless e.empty?
        if e.first.parsed_html &&
            (first_node_name = e.first.parsed_html.first_node_name) &&
            HTML_INLINE_ELEMS.include?(first_node_name) &&
            !%w(svg math).include?(first_node_name)
          content = [e.first]
          if e.size > 1
            content.concat(e[1].children)
          end
          output << md_par(content)
        else
          output.concat(e)
        end
      end
    when :footnote_text
      output << read_footnote_text(src)
    when :ref_definition
      if src.parent && src.cur_index == 0
        read_text_material(src, output)
      else
        read_ref_definition(src, output)
      end
    when :abbreviation
      output << read_abbreviation(src)
    when :xml_instr
      read_xml_instruction(src, output)
    else # unhandled line type at this level
      # Just treat it as raw text
      read_text_material(src, output)
    end
  end

  merge_ial(output, src, output)
  output.delete_if do |x|
    # Strip out IAL
    (x.kind_of?(MDElement) && x.node_type == :ial) ||
    # get rid of empty line markers
    x == :empty
  end

  # See for each list if we can omit the paragraphs
  # TODO: do this after
  output.each do |c|
    # Remove paragraphs that we can get rid of
    if [:ul, :ol].include?(c.node_type) && c.children.none?(&:want_my_paragraph)
      c.children.each do |d|
        if d.children.first && d.children.first.node_type == :paragraph
          d.children = d.children.first.children + d.children[1..-1]
        end
      end
    elsif c.node_type == :definition_list && c.children.none?(&:want_my_paragraph)
      c.children.each do |definition|
        definition.definitions.each do |dd|
          if dd.children.first.node_type == :paragraph
            dd.children = dd.children.first.children + dd.children[1..-1]
          end
        end
      end
    end
  end

  output
end

#parse_doc(s) ⇒ `Object`

# File 'lib/maruku/input/parse_doc.rb', line 6

def parse_doc(s)
  # Remove BOM if it is present
  s = s.sub(/^\xEF\xBB\xBF/u, '')
  meta2 = parse_email_headers(s)
  data = meta2.delete :data

  self.attributes.merge! meta2

=begin maruku_doc
Attribute: encoding
Scope:     document
Summary:   Encoding for the document.

If the `encoding` attribute is specified, then the content
will be converted from the specified encoding to UTF-8.
=end

  enc = self.attributes.delete(:encoding) || 'utf-8'
  if enc.downcase != 'utf-8'
    # Switch to ruby 1.9 String#encode
    # with backward 1.8 compatibility
    if data.respond_to?(:encode!)
      data.encode!('UTF-8', enc)
    else
      require 'iconv'
      data = Iconv.new('utf-8', enc).iconv(data)
    end
  end

  @children = parse_text_as_markdown(data)

  if markdown_extra?
    self.search_abbreviations
    self.substitute_markdown_inside_raw_html
  end

  self.toc = create_toc

  # use title if not set
  self.attributes[:title] ||= toc.header_element.children.join if toc.header_element

  # Now do the attributes magic
  each_element do |e|
    # default attribute list
    if default = self.ald[e.node_type.to_s]
      expand_attribute_list(default, e.attributes)
    end
    expand_attribute_list(e.al, e.attributes)
#     puts "#{e.node_type}: #{e.attributes.inspect}"
  end

=begin maruku_doc
Attribute: unsafe_features
Scope:     global
Summary:   Enables execution of XML instructions.

Disabled by default because of security concerns.
=end

  if Maruku::Globals[:unsafe_features]
    self.execute_code_blocks
    # TODO: remove executed code blocks
  end
end

#parse_text_as_markdown(text) ⇒ `Object`

Splits the string and calls parse_lines_as_markdown

# File 'lib/maruku/input/parse_block.rb', line 17

def parse_text_as_markdown(text)
  lines =  split_lines(text)
  src = LineSource.new(lines)
  parse_blocks(src)
end

#pick_apart_non_inline_html(children) ⇒ `Object`

If there are non-inline HTML tags in the paragraph, break them out into their own elements and make paragraphs out of everything else.

# File 'lib/maruku/input/parse_block.rb', line 296

def pick_apart_non_inline_html(children)
  output = []
  para_children = []

  children.each do |child|
    if element_is_non_inline_html?(child)
      unless para_children.empty?
        # Fix up paragraphs before non-inline elements having an extra space
        last_child = para_children.last
        if last_child.is_a?(String) && !last_child.empty?
          last_child.replace last_child[0..-2]
        end

        output << md_par(para_children)
        para_children = []
      end
      output << child
    else
      para_children << child
    end
  end

  unless para_children.empty?
    output << md_par(para_children)
  end

  output
end

#read_abbreviation(src) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 363

def read_abbreviation(src)
  unless (l = src.shift_line) =~ Abbreviation
    maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
  end

  abbr = $1
  desc = $2

  if !abbr || abbr.empty?
    maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
  end

  self.abbreviations[abbr] = desc

  md_abbr_def(abbr, desc)
end

#read_ald(src) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 158

def read_ald(src)
  if (l = src.shift_line) =~ AttributeDefinitionList
    id = $1
    al = read_attribute_list(CharSource.new($2, src))
    self.ald[id] = al;
    md_ald(id, al)
  else
    maruku_error "Bug Bug:\n#{l.inspect}"
    nil
  end
end

#read_code(src) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 499

def read_code(src)
  # collect all indented lines
  lines = []
  while src.cur_line && [:code, :empty].include?(src.cur_line.md_type)
    lines << strip_indent(src.shift_line, 4)
  end

  #while lines.last && (lines.last.md_type == :empty )
  while lines.last && lines.last.strip.size == 0
    lines.pop
  end

  while lines.first && lines.first.strip.size == 0
    lines.shift
  end

  return nil if lines.empty?

  source = lines.join("\n")

  md_codeblock(source)
end

#read_definition(src) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 694

def read_definition(src)
  # Read one or more terms
  terms = []
  while src.cur_line && src.cur_line.md_type == :text
    terms << md_el(:definition_term, parse_span(src.shift_line))
  end

  want_my_paragraph = false

  raise "Chunky Bacon!" unless src.cur_line

  # one optional empty
  if src.cur_line.md_type == :empty
    want_my_paragraph = true
    src.shift_line
  end

  raise "Chunky Bacon!" unless src.cur_line.md_type == :definition

  # Read one or more definitions
  definitions = []
  while src.cur_line && src.cur_line.md_type == :definition
    parent_offset = src.cur_index

    first = src.shift_line
    first =~ Definition
    first = $1

    lines, w_m_p = read_indented_content(src, 4, :definition, :definition)
    want_my_paragraph ||= w_m_p

    lines.unshift first

    src2 = LineSource.new(lines, src, parent_offset)
    children = parse_blocks(src2)
    definitions << md_el(:definition_data, children)
  end

  md_el(:definition, terms + definitions, {
          :terms => terms,
          :definitions => definitions,
          :want_my_paragraph => want_my_paragraph
        })
end

#read_footnote_text(src) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 380

def read_footnote_text(src)
  parent_offset = src.cur_index

  first = src.shift_line

  unless first =~ FootnoteText
    maruku_error "Bug (it's Andrea's fault)"
  end

  id = $1
  text = $2 || ''

  indentation = 4 #first.size-text.size

  #   puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"

  break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
  item_type = :footnote_text
  lines, _ = read_indented_content(src, indentation, break_list, item_type)

  # add first line
  lines.unshift text unless text.strip.empty?

  src2 = LineSource.new(lines, src, parent_offset)
  children = parse_blocks(src2)

  e = md_footnote(id, children)
  self.footnotes[id] = e
  e
end

#read_header12(src) ⇒ `Object`

reads a header (with —– or ========)

# File 'lib/maruku/input/parse_block.rb', line 171

def read_header12(src)
  line = src.shift_line.strip
  al = nil
  # Check if there is an IAL
  if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
    line = $1.strip
    ial = $2
    al = read_attribute_list(CharSource.new(ial, src))
  end
  text = parse_span line
  if text.empty?
    text = "{#{ial}}"
    al = nil
  end
  level = src.cur_line.md_type == :header2 ? 2 : 1;
  src.shift_line
  md_header(level, text, al)
end

#read_header3(src) ⇒ `Object`

reads a header like ‘#### header ####’

# File 'lib/maruku/input/parse_block.rb', line 191

def read_header3(src)
  line = src.shift_line.strip
  al = nil
  # Check if there is an IAL
  if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
    line = $1.strip
    ial = $2
    al = read_attribute_list(CharSource.new(ial, src))
  end
  level = line[/^#+/].size
  if level > 6
    text = parse_span line
    return md_par(text, al)
  end
  text = parse_span line.gsub(/\A#+|#+\z/, '')
  if text.empty?
    text = "{#{ial}}"
    al = nil
  end
  md_header(level, text, al)
end

#read_indented_content(src, indentation, break_list, item_type, ial_offset = 0) ⇒ `Object`

This is the only ugly function in the code base. It is used to read list items, descriptions, footnote text

# File 'lib/maruku/input/parse_block.rb', line 414

def read_indented_content(src, indentation, break_list, item_type, ial_offset=0)
  lines = []
  # collect all indented lines
  saw_empty = false
  saw_anything_after = false
  break_list = Array(break_list)
  len = indentation - ial_offset

  while src.cur_line
    num_leading_spaces = src.cur_line.number_of_leading_spaces
    break if num_leading_spaces < len && ![:text, :empty, :code].include?(src.cur_line.md_type)

    line = strip_indent(src.cur_line, indentation)
    md_type = line.md_type

    if md_type == :empty
      saw_empty = true
      lines << line
      src.shift_line
      next
    end

    # Unquestioningly grab anything that's deeper-indented
    if md_type != :code && num_leading_spaces > len
      lines << line
      src.shift_line
      next
    end

    # after a white line
    if saw_empty
      # we expect things to be properly aligned
      break if num_leading_spaces < len
      saw_anything_after = true
    else
      break if break_list.include?(md_type)
    end

    if md_type == :code && num_leading_spaces > len+6
      lines << strip_indent(src.cur_line, num_leading_spaces-4)
      src.shift_line
      next
    end

    lines << line
    src.shift_line

    # You are only required to indent the first line of
    # a child paragraph.
    if md_type == :text
      while src.cur_line && src.cur_line.md_type == :text
        lines << strip_indent(src.shift_line, indentation)
      end
    end
  end

  # TODO fix this
  want_my_paragraph = saw_anything_after ||
    (saw_empty && src.cur_line && src.cur_line.md_type == item_type)

  # create a new context

  while lines.last && lines.last.md_type == :empty
    lines.pop
  end

  return lines, want_my_paragraph
end

#read_list_item(src) ⇒ `Object`

Reads one list item, either ordered or unordered.

# File 'lib/maruku/input/parse_block.rb', line 336

def read_list_item(src)
  parent_offset = src.cur_index

  item_type = src.cur_line.md_type
  first = src.shift_line

  indentation, ial = spaces_before_first_char(first)
  al = read_attribute_list(CharSource.new(ial, src)) if ial
  ial_offset = ial ? ial.length + 3 : 0
  lines, want_my_paragraph = read_indented_content(src, indentation, [], item_type, ial_offset)

  # in case there is a second line and this line starts a new list, format it.
  if !lines.empty? && [:ulist, :olist].include?(MaRuKu::MDLine.new(lines.first).md_type)
    lines.unshift ""
  end

  # add first line
  # Strip first '*', '-', '+' from first line
  first_changed = first.gsub(/([^\t]*)(\t)/) { $1 + " " * (TAB_SIZE - $1.length % TAB_SIZE) }
  stripped = first_changed[indentation, first_changed.size - 1]
  lines.unshift stripped
  src2 = LineSource.new(lines, src, parent_offset)
  children = parse_blocks(src2)

  md_li(children, want_my_paragraph, al)
end

#read_paragraph(src) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 275

def read_paragraph(src)
  lines = [src.shift_line]
  while src.cur_line
    # :olist does not break
    case t = src.cur_line.md_type
    when :quote, :header3, :empty, :ref_definition, :ial, :xml_instr
      break
    end
    break if src.cur_line.strip.empty?
    break if src.next_line && [:header1, :header2].include?(src.next_line.md_type)
    break if any_matching_block_extension?(src.cur_line)

    lines << src.shift_line
  end
  children = parse_span(lines, src)

  pick_apart_non_inline_html(children)
end

#read_quote(src) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 484

def read_quote(src)
  parent_offset = src.cur_index

  lines = []
  # collect all indented lines
  while src.cur_line &&  ( [:text, :quote].include?(src.cur_line.md_type) or
      src.cur_line.md_type == :empty && ( src.next_line && src.next_line.md_type == :quote ) )
    lines << unquote(src.shift_line)
  end

  src2 = LineSource.new(lines, src, parent_offset)
  children = parse_blocks(src2)
  md_quote(children)
end

#read_raw_html(src) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 242

def read_raw_html(src)
  extra_line = nil
  h = HTMLHelper.new
  begin
    l = src.shift_line
    h.eat_this(l)
    #     puts "\nBLOCK:\nhtml -> #{l.inspect}"
    while src.cur_line && !h.is_finished?
      l = src.shift_line
      #       puts "html -> #{l.inspect}"
      h.eat_this "\n" + l
    end
  rescue => e
    maruku_error "Bad block-level HTML:\n#{e.inspect.gsub(/^/, '|')}\n", src
  end
  unless h.rest =~ /^\s*$/
    extra_line = h.rest
  end
  raw_html = h.stuff_you_read

  is_inline = HTML_INLINE_ELEMS.include?(h.first_tag)

  if extra_line
    remainder = is_inline ? parse_span(extra_line) : parse_text_as_markdown(extra_line)
    if extra_line.start_with?(' ')
      remainder[0] = ' ' + remainder[0] if remainder[0].is_a?(String)
    end
    is_inline ? [md_html(raw_html), md_par(remainder)] : [md_html(raw_html)] + remainder
  else
    [md_html(raw_html)]
  end
end

#read_ref_definition(src, out) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 522

def read_ref_definition(src, out)
  line = src.shift_line

  # if link is incomplete, shift next line
  if src.cur_line &&
      ![:footnote_text, :ref_definition, :definition, :abbreviation].include?(src.cur_line.md_type) &&
      (1..3).include?(src.cur_line.number_of_leading_spaces)
    line << " " << src.shift_line
  end

  match = LinkRegex.match(line)
  unless match
    maruku_error "Link does not respect format: '#{line}'" and return
  end

  id = match[1]
  url = match[2]
  title = match[3] || match[4] || match[5]
  id = sanitize_ref_id(id)

  hash = self.refs[id] = {
    :url => url,
    :title => title
  }

  stuff = (match[6] || '')
  stuff.split.each do |couple|
    k, v = couple.split('=')
    v ||= ""
    v = v[1..-2] if v.start_with?('"') # strip quotes
    hash[k.to_sym] = v
  end

  out << md_ref_def(id, url, :title => title)
end

#read_table(src) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 572

def read_table(src)
  head = split_cells(src.shift_line).map do |s|
    md_el(:head_cell, parse_span(s))
  end

  separator = split_cells(src.shift_line)

  align = separator.map do |s|
    # ex: :-------------------:
    # If the separator starts and ends with a colon,
    # center the cell. If it's on the right, right-align,
    # otherwise left-align.
    starts = s.start_with? ':'
    ends = s.end_with? ':'
    if s.empty? # blank
      nil
    elsif starts && ends
      :center
    elsif ends
      :right
    else
      :left
    end
  end

  align.pop if align[-1].nil? # trailing blank
  num_columns = align.size

  head.pop if head.size == num_columns + 1 && head[-1].al.size == 0 # trailing blank

  if head.size != num_columns
    maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
    tell_user "I will ignore this table."
    # XXX try to recover
    return md_br
  end

  rows = []
  while src.cur_line && src.cur_line.include?('|')
    row = []
    colCount = 0
    colspan = 1
    currElem = nil
    currIdx = 0
    split_cells(src.shift_line, true).map do |s|
      if s.empty?
        # empty cells increase the colspan of the previous cell
        found = false
        colspan +=  1
        al = (currElem &&currElem.al) || AttributeList.new
        if al.size > 0
          elem = find_colspan(al)
          if elem != nil
            elem[1] = colspan.to_s
            found = true
          end
        end
        al.push(["colspan", colspan.to_s]) unless found # also handles the case of and empty attribute list
      else
        colspan = 1
        row[currIdx] = md_el(:cell, parse_span(s))
        currElem = row[currIdx]
        currIdx += 1
      end
    end

    #
    # sanity check - make sure the current row has the right number of columns (including spans)
    #                If not, dump the table and return a break
    #
    num_columns = count_columns(row)
    if num_columns == head.size + 1 && row[-1].al.size == 0 #trailing blank cell
      row.pop
      num_columns -= 1
    end
    if head.size != num_columns
      maruku_error  "Row does not have #{head.size} columns: \n#{row.inspect} - #{num_columns}"
      tell_user "I will ignore this table."
      # XXX need to recover
      return md_br
    end
    rows << row
  end
  rows.unshift(head) # put the header row on the processed table
  md_el(:table, rows, { :align => align })
end

#read_text_material(src, output) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 138

def read_text_material(src, output)
  if src.cur_line.include?('|') && # if contains a pipe, it could be a table header
      src.next_line &&
      src.next_line.rstrip =~ TableSeparator
    output << read_table(src)
  elsif src.next_line && [:header1, :header2].include?(src.next_line.md_type)
    output << read_header12(src)
  elsif eventually_comes_a_def_list(src)
    definition = read_definition(src)
    if output.last.kind_of?(MDElement) &&
        output.last.node_type == :definition_list then
      output.last.children << definition
    else
      output << md_el(:definition_list, definition)
    end
  else # Start of a paragraph
    output.concat read_paragraph(src)
  end
end

#read_xml_instruction(src, output) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 213

def read_xml_instruction(src, output)
  m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
  raise "BugBug" unless m
  target = m[2] || ''
  code = m[3]
  until code.include?('?>')
    code << "\n" << src.shift_line
  end
  unless code =~ /\?>\s*$/
    garbage = (/\?>(.*)$/.match(code))[1]
    maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n" +
      code.gsub(/^/, '|'), src
  end
  code.gsub!(/\?>\s*$/, '')

  if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
    result = safe_execute_code(self, code)
    if result
      if result.kind_of? String
        raise "Not expected"
      else
        output.push(*result)
      end
    end
  else
    output << md_xml_instr(target, code)
  end
end

#safe_execute_code(object, code) ⇒ `Object`

# File 'lib/maruku/input/parse_doc.rb', line 111

def safe_execute_code(object, code)
  begin
    object.instance_eval(code)
  rescue StandardError, ScriptError => e
    maruku_error "Exception while executing this:\n" +
      code.gsub(/^/, ">") +
      "\nThe error was:\n" +
      (e.inspect + "\n" + e.send(:caller).join("\n")).gsub(/^/, "|")
    nil
  end
end

#search_abbreviations ⇒ `Object`

# File 'lib/maruku/input/parse_doc.rb', line 134

def search_abbreviations
  abbreviations.each do |abbrev, title|
    reg = Regexp.new(Regexp.escape(abbrev))
    replace_each_string do |s|
      # bug if many abbreviations are present (agorf)
      p = StringScanner.new(s)
      a = []
      until p.eos?
        o = ''
        o << p.getch until p.scan(reg) or p.eos?
        a << o unless o.empty?
        a << md_abbr(abbrev.dup, title ? title.dup : nil) if p.matched == abbrev
      end
      a
    end
  end
end

#split_cells(s, allowBlank = false) ⇒ `Object`

# File 'lib/maruku/input/parse_block.rb', line 558

def split_cells(s, allowBlank = false)
  if allowBlank
    if /^[|].*[|]$/ =~ s # handle the simple and decorated table cases
      s.split('|', -1)[1..-2]   # allow blank cells, but only keep the inner elements of the cells
    elsif /^.*[|]$/ =~ s
      s.split('|', -1)[0..-2]   # allow blank cells, but only keep the inner elements of the cells
    else
      s.split('|', -1)
    end
  else
    s.split('|').reject(&:empty?).map(&:strip)
  end
end

#substitute_markdown_inside_raw_html ⇒ `Object`

(PHP Markdown extra) Search for elements that have markdown=1 or markdown=block defined

# File 'lib/maruku/input/parse_doc.rb', line 154

def substitute_markdown_inside_raw_html
  each_element(:raw_html) do |e|
    html = e.parsed_html
    next unless html

    html.process_markdown_inside_elements(self)
  end
end

Module: MaRuKu::In::Markdown::BlockLevelParser

Defined Under Namespace

Constant Summary

Constants included from SpanLevelParser

Constants included from Strings

Instance Method Summary collapse

Methods included from SpanLevelParser

Methods included from Helpers

Methods included from Strings

Instance Method Details

#count_columns(row) ⇒ Object

#element_is_non_inline_html?(elem) ⇒ Boolean

#eventually_comes_a_def_list(src) ⇒ Object

#execute_code_blocks ⇒ Object

#expand_attribute_list(al, result) ⇒ Object

#find_colspan(al) ⇒ Object

#parse_blocks(src) ⇒ Object

#parse_doc(s) ⇒ Object

#parse_text_as_markdown(text) ⇒ Object

#pick_apart_non_inline_html(children) ⇒ Object

#read_abbreviation(src) ⇒ Object

#read_ald(src) ⇒ Object

#read_code(src) ⇒ Object

#read_definition(src) ⇒ Object

#read_footnote_text(src) ⇒ Object

#read_header12(src) ⇒ Object

#read_header3(src) ⇒ Object

#read_indented_content(src, indentation, break_list, item_type, ial_offset = 0) ⇒ Object

#read_list_item(src) ⇒ Object

#read_paragraph(src) ⇒ Object

#read_quote(src) ⇒ Object

#read_raw_html(src) ⇒ Object

#read_ref_definition(src, out) ⇒ Object

#read_table(src) ⇒ Object

#read_text_material(src, output) ⇒ Object

#read_xml_instruction(src, output) ⇒ Object

#safe_execute_code(object, code) ⇒ Object

#search_abbreviations ⇒ Object

#split_cells(s, allowBlank = false) ⇒ Object

#substitute_markdown_inside_raw_html ⇒ Object