Module: MaRuKu::Strings

Overview

Utility functions for dealing with strings.

Constant Summary collapse

TAB_SIZE =
4
AttributeDefinitionList =

$1 = id $2 = attribute list

/^\s{0,3}\{([\w\d\s]+)\}:\s*(.*?)\s*$/
InlineAttributeList =
/^\s{0,3}\{([:#\.].*?)\}\s*$/
Definition =

Example:

^:blah blah
^: blah blah
^   : blah blah
%r{ 
  ^ # begin of line
  [ ]{0,3} # up to 3 spaces
  : # colon
  \s* # whitespace
  (\S.*) # the text    = $1
  $ # end of line
}x
Abbreviation =

Example:

*[HTML]: Hyper Text Markup Language
%r{
  ^  # begin of line
  [ ]{0,3} # up to 3 spaces
  \* # one asterisk
  \[ # opening bracket
  ([^\]]+) # any non-closing bracket:  id = $1
  \] # closing bracket
  :  # colon
  \s* # whitespace
  (\S.*\S)* #           definition=$2
  \s* # strip this whitespace
  $   # end of line
}x
FootnoteText =
%r{
  ^  # begin of line
  [ ]{0,3} # up to 3 spaces
  \[(\^.+)\]: # id = $1 (including '^')
  \s*(\S.*)?$    # text = $2 (not obb.)
}x
LinkRegex =

This regex is taken from BlueCloth sources Link defs are in the form: ^[id]: n? url “optional title”

%r{
    ^[ ]{0,3}\[([^\[\]]+)\]:    # id = $1
 [ ]*
    <?([^>\s]+)>?       # url = $2
 [ ]*
    (?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?)     # title = $3
[")']     # Matching ) or "
\s*(.+)?   # stuff = $4
    )?  # title is optional
}x
%r{^[ ]{0,3}\[([^\[\]]+?)\]:\s*$}
HeaderWithId =
/^(.*?)\{\#([\w_-]+)\}\s*$/
HeaderWithAttributes =
/^(.*?)\{(.*?)\}\s*$/
MightBeTableHeader =

if contains a pipe, it could be a table header

%r{\|}
Sep =

————-:

/\s*(\:)?\s*-+\s*(\:)?\s*/
TableSeparator =

| ————-:| —————————— |

%r{^(\|?#{Sep}\|?)+?\s*$}
EMailAddress =
/<([^:]+?@[^:]+?)>/

Instance Method Summary collapse

Instance Method Details

#line_md_type(l) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/maruku/input/type_detection.rb', line 36

def line_md_type(l)
  # The order of evaluation is important (:text is a catch-all)
  return :text   if l =~ /^[a-zA-Z]/
  return :code             if number_of_leading_spaces(l)>=4
  return :empty    if l =~ /^\s*$/
  return :footnote_text    if l =~ FootnoteText
  return :ref_definition   if l =~ LinkRegex or l=~ IncompleteLink
  return :abbreviation     if l =~ Abbreviation
  return :definition       if l =~ Definition
  # I had a bug with emails and urls at the beginning of the 
  # line that were mistaken for raw_html
  return :text if l=~ /^[ ]{0,3}#{EMailAddress}/
  return :text if l=~ /^[ ]{0,3}<http:/
  # raw html is like PHP Markdown Extra: at most three spaces before
  return :xml_instr if l =~ %r{^\s*<\?}
  return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
  return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?<\!\-\-}
  # Something is wrong with how we parse lists! :-(
  #return :ulist    if l =~ /^[ ]{0,3}([\*\-\+])\s+.*\w+/
  #return :olist    if l =~ /^[ ]{0,3}\d+\..*\w+/
  return :ulist    if l =~ /^[ ]{0,1}([\*\-\+])\s+.*\w+/
  return :olist    if l =~ /^[ ]{0,1}\d+\..*\w+/
  return :header1  if l =~ /^(=)+/ 
  return :header2  if l =~ /^([-\s])+$/ 
  return :header3  if l =~ /^(#)+\s*\S+/ 
  # at least three asterisks on a line, and only whitespace
  return :hrule    if l =~ /^(\s*\*\s*){3,1000}$/ 
  return :hrule    if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
  return :hrule    if l =~ /^(\s*_\s*){3,1000}$/ # or underscores  
  return :quote    if l =~ /^>/
  return :metadata if l =~ /^@/
#   if @@new_meta_data?
    return :ald   if l =~ AttributeDefinitionList
    return :ial   if l =~ InlineAttributeList
#   end
#   return :equation_end if l =~ EquationEnd
  return :text # else, it's just text
end

#number_of_leading_spaces(s) ⇒ Fixnum

Returns the number of leading spaces, considering that a tab counts as TAB_SIZE spaces.

Parameters:

Returns:

  • (Fixnum)


71
72
73
74
# File 'lib/maruku/string_utils.rb', line 71

def number_of_leading_spaces(s)
  spaces = s.scan(/^\s*/).first
  spaces.count(" ") + spaces.count("\t") * TAB_SIZE
end

#parse_email_headers(s) ⇒ Symbol => String

Parses email headers, returning a hash. ‘hash` is the message; that is, anything past the headers.

Keys are downcased and converted to symbols; spaces become underscores. For example:

My key: true

becomes:

{:my_key => true}

Parameters:

Returns:

  • (Symbol => String)

    The header values



53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/maruku/string_utils.rb', line 53

def parse_email_headers(s)
  headers = {}
  scanner = StringScanner.new(s)

  while scanner.scan(/(\w[\w\s\-]+): +(.*)\n/)
    k, v = normalize_key_and_value(scanner[1], scanner[2])
    headers[k.to_sym] = v
  end

  headers[:data] = scanner.rest
  headers
end

#parse_yaml_headers(s) ⇒ Symbol => String

Parses yaml headers, returning a hash. ‘hash` is the message; that is, anything past the headers.

Keys are downcased and converted to symbols; spaces become underscores. For example:


My key: true


becomes:

=> true

Parameters:

  • s (String)

    the entire contents

Returns:

  • (Symbol => String)

    The header values



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/maruku/ext/yaml.rb', line 23

def parse_yaml_headers(s)
  headers = {}
  if s =~ /^(---\s*\n.*?\n?)^(---\s*$\n?)/m
    
    begin
      hash = YAML.load($1)
    rescue => e
      puts "YAML Exception reading #{name}: #{e.message}"
      hash = {}
    end
  end
  hash.each_pair do |yamlkey,yamlval| 
    k, v = normalize_key_and_value(yamlkey, yamlval)
    headers[k.to_sym] = v
  end
    
  headers[:data] = $' # the postmatch string
  headers
end

#sanitize_ref_id(s) ⇒ String

Replace spaces with underscores and remove non-word characters.

Parameters:

Returns:



105
106
107
# File 'lib/maruku/string_utils.rb', line 105

def sanitize_ref_id(s)
  s.strip.downcase.gsub(' ', '_').gsub(/[^\w]/, '')
end

#shellescape(str) ⇒ String

Escapes a string so that it can be safely used in a Bourne shell command line.

Note that a resulted string should be used unquoted and is not intended for use in double quotes nor in single quotes.

This is a copy of the Shellwords.shellescape function in Ruby 1.8.7. It’s included for Ruby 1.8.6 compatibility.

Parameters:

Returns:



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/maruku/string_utils.rb', line 146

def shellescape(str)
  # An empty argument will be skipped, so return empty quotes.
  return "''" if str.empty?

  str = str.dup

  # Process as a single byte sequence because not all shell
  # implementations are multibyte aware.
  str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/n, "\\\\\\1")

  # A LF cannot be escaped with a backslash because a backslash + LF
  # combo is regarded as line continuation and simply ignored.
  str.gsub!(/\n/, "'\n'")

  return str
end

#spaces_before_first_char(s) ⇒ Fixnum

This returns the position of the first non-list character in a list item.

spaces_before_first_char(‘*Hello’) #=> 1 spaces_before_first_char(‘* Hello’) #=> 2 spaces_before_first_char(‘ * Hello’) #=> 3 spaces_before_first_char(‘ * Hello’) #=> 5 spaces_before_first_char(‘1.Hello’) #=> 2 spaces_before_first_char(‘ 1. Hello’) #=> 5

Parameters:

Returns:

  • (Fixnum)


89
90
91
92
93
94
95
96
97
98
99
# File 'lib/maruku/string_utils.rb', line 89

def spaces_before_first_char(s)
  match = 
    case s.md_type
    when :ulist; s.match(/\s*.(\s*\{(.*?)\})?\s*/)
    when :olist; s.match(/s*\d+.(\s*\{(.*?)\})?\s*/)
    else
      tell_user "MARUKU BUG: '#{s.inspect}' is not a list"
      nil
    end
  match ? [match.end(0), match[0]] : [0, nil]
end

#split_lines(s) ⇒ String

Split a string into multiple lines, on line feeds and/or carriage returns.

Parameters:

Returns:



33
34
35
# File 'lib/maruku/string_utils.rb', line 33

def split_lines(s)
  s.split(/\r\n|\r|\n/)
end

#strip_indent(s, n) ⇒ String

Removes indentation from the beginning of ‘s`, up to at most `n` spaces. Tabs are counted as TAB_SIZE spaces.

Parameters:

Returns:



124
125
126
127
128
129
130
131
132
133
134
# File 'lib/maruku/string_utils.rb', line 124

def strip_indent(s, n)
  while n > 0
    case s[0]
    when ?\s; n -= 1
    when ?\t; n -= TAB_SIZE
    else; return s
    end
    s = s[1..-1]
  end
  return s
end

#unquote(s) ⇒ String

Remove line-initial ‘>` characters for a quotation.

Parameters:

Returns:



113
114
115
# File 'lib/maruku/string_utils.rb', line 113

def unquote(s)
  s.gsub(/^>\s?/, '')
end