Module: Citrus

Defined in:
lib/citrus.rb,
lib/citrus/file.rb,
lib/citrus/version.rb

Overview

Citrus is a compact and powerful parsing library for Ruby that combines the elegance and expressiveness of the language with the simplicity and power of parsing expressions.

mjackson.github.io/citrus

Defined Under Namespace

Modules: Grammar, GrammarMethods, ModuleNameHelpers, Nonterminal, Proxy, Rule Classes: Alias, AndPredicate, ButPredicate, Choice, Error, Input, LoadError, Match, MemoizedInput, NotPredicate, ParseError, Repeat, Sequence, StringTerminal, Super, SyntaxError, Terminal

Constant Summary collapse

DOT =

A pattern to match any character, including newline.

/./mu
Infinity =
1.0 / 0
CLOSE =
-1
File =

A grammar for Citrus grammar files. This grammar is used in Citrus.eval to parse and evaluate Citrus grammars and serves as a prime example of how to create a complex grammar complete with semantic interpretation in pure Ruby.

Grammar.new do #:nodoc:

  ## Hierarchical syntax

  rule :file do
    all(:space, zero_or_more(any(:require, :grammar))) {
      captures[:require].each do |req|
        file = req.value
        begin
          require file
        rescue ::LoadError => e
          begin
            Citrus.require(file)
          rescue LoadError
            # Re-raise the original LoadError.
            raise e
          end
        end
      end

      captures[:grammar].map {|g| g.value }
    }
  end

  rule :grammar do
    mod all(:grammar_keyword, :module_name, zero_or_more(any(:include, :root, :rule)), :end_keyword) do
      include ModuleNameHelpers

      def value
        grammar = module_namespace.const_set(module_basename, Grammar.new)

        captures[:include].each {|inc| grammar.include(inc.value) }
        captures[:rule].each {|r| grammar.rule(r.rule_name.value, r.value) }

        root = capture(:root)
        grammar.root(root.value) if root

        grammar
      end
    end
  end

  rule :rule do
    mod all(:rule_keyword, :rule_name, zero_or_one(:expression), :end_keyword) do
      def rule_name
        capture(:rule_name)
      end

      def value
        # An empty rule definition matches the empty string.
        expr = capture(:expression)
        expr ? expr.value : Rule.for('')
      end
    end
  end

  rule :expression do
    all(:sequence, zero_or_more([['|', zero_or_one(:space)], :sequence])) {
      rules = captures[:sequence].map {|s| s.value }
      rules.length > 1 ? Choice.new(rules) : rules.first
    }
  end

  rule :sequence do
    one_or_more(:labelled) {
      rules = captures[:labelled].map {|l| l.value }
      rules.length > 1 ? Sequence.new(rules) : rules.first
    }
  end

  rule :labelled do
    all(zero_or_one(:label), :extended) {
      label = capture(:label)
      rule = capture(:extended).value
      rule.label = label.value if label
      rule
    }
  end

  rule :extended do
    all(:prefix, zero_or_one(:extension)) {
      extension = capture(:extension)
      rule = capture(:prefix).value
      rule.extension = extension.value if extension
      rule
    }
  end

  rule :prefix do
    all(zero_or_one(:predicate), :suffix) {
      predicate = capture(:predicate)
      rule = capture(:suffix).value
      rule = predicate.value(rule) if predicate
      rule
    }
  end

  rule :suffix do
    all(:primary, zero_or_one(:repeat)) {
      repeat = capture(:repeat)
      rule = capture(:primary).value
      rule = repeat.value(rule) if repeat
      rule
    }
  end

  rule :primary do
    any(:grouping, :proxy, :terminal)
  end

  rule :grouping do
    all(['(', zero_or_one(:space)], :expression, [')', zero_or_one(:space)]) {
      capture(:expression).value
    }
  end

  ## Lexical syntax

  rule :require do
    all(:require_keyword, :quoted_string) {
      capture(:quoted_string).value
    }
  end

  rule :include do
    mod all(:include_keyword, :module_name) do
      include ModuleNameHelpers

      def value
        module_namespace.const_get(module_basename)
      end
    end
  end

  rule :root do
    all(:root_keyword, :rule_name) {
      capture(:rule_name).value
    }
  end

  # Rule names may contain letters, numbers, underscores, and dashes. They
  # MUST start with a letter.
  rule :rule_name do
    all(/[a-zA-Z][a-zA-Z0-9_-]*/, :space) {
      first.to_s
    }
  end

  rule :proxy do
    any(:super, :alias)
  end

  rule :super do
    ext(:super_keyword) {
      Super.new
    }
  end

  rule :alias do
    all(notp(:end_keyword), :rule_name) {
      Alias.new(capture(:rule_name).value)
    }
  end

  rule :terminal do
    any(:quoted_string, :case_insensitive_string, :regular_expression, :character_class, :dot) {
      primitive = super()

      if String === primitive
        StringTerminal.new(primitive, flags)
      else
        Terminal.new(primitive)
      end
    }
  end

  rule :quoted_string do
    mod all(/(["'])(?:\\?.)*?\1/, :space) do
      def value
        eval(first.to_s)
      end

      def flags
        0
      end
    end
  end

  rule :case_insensitive_string do
    mod all(/`(?:\\?.)*?`/, :space) do
      def value
        eval(first.to_s.gsub(/^`|`$/, '"'))
      end

      def flags
        Regexp::IGNORECASE
      end
    end
  end

  rule :regular_expression do
    all(/\/(?:\\?.)*?\/[imxouesn]*/, :space) {
      eval(first.to_s)
    }
  end

  rule :character_class do
    all(/\[(?:\\?.)*?\]/, :space) {
      eval("/#{first.to_s.gsub('/', '\\/')}/")
    }
  end

  rule :dot do
    all('.', :space) {
      DOT
    }
  end

  rule :label do
    all(/[a-zA-Z0-9_]+/, :space, ':', :space) {
      first.to_str.to_sym
    }
  end

  rule :extension do
    any(:tag, :block)
  end

  rule :tag do
    mod all(
      ['<', zero_or_one(:space)],
      :module_name,
      ['>', zero_or_one(:space)]
    ) do
      include ModuleNameHelpers

      def value
        module_namespace.const_get(module_basename)
      end
    end
  end

  rule :block do
    all(
      '{',
      zero_or_more(any(:block, /[^{}]+/)),
      ['}', zero_or_one(:space)]
    ) {
      proc = eval("Proc.new #{to_s}", TOPLEVEL_BINDING)

      # Attempt to detect if this is a module block using some
      # extremely simple heuristics.
      if to_s =~ /\b(def|include) /
        Module.new(&proc)
      else
        proc
      end
    }
  end

  rule :predicate do
    any(:and, :not, :but)
  end

  rule :and do
    all('&', :space) { |rule|
      AndPredicate.new(rule)
    }
  end

  rule :not do
    all('!', :space) { |rule|
      NotPredicate.new(rule)
    }
  end

  rule :but do
    all('~', :space) { |rule|
      ButPredicate.new(rule)
    }
  end

  rule :repeat do
    any(:question, :plus, :star)
  end

  rule :question do
    all('?', :space) { |rule|
      Repeat.new(rule, 0, 1)
    }
  end

  rule :plus do
    all('+', :space) { |rule|
      Repeat.new(rule, 1, Infinity)
    }
  end

  rule :star do
    all(/[0-9]*/, '*', /[0-9]*/, :space) { |rule|
      min = captures[1] == '' ? 0 : captures[1].to_str.to_i
      max = captures[3] == '' ? Infinity : captures[3].to_str.to_i
      Repeat.new(rule, min, max)
    }
  end

  rule :module_name do
    all(one_or_more([ zero_or_one('::'), :constant ]), :space) {
      first.to_s
    }
  end

  rule :require_keyword,  [ /\brequire\b/, :space ]
  rule :include_keyword,  [ /\binclude\b/, :space ]
  rule :grammar_keyword,  [ /\bgrammar\b/, :space ]
  rule :root_keyword,     [ /\broot\b/, :space ]
  rule :rule_keyword,     [ /\brule\b/, :space ]
  rule :super_keyword,    [ /\bsuper\b/, :space ]
  rule :end_keyword,      [ /\bend\b/, :space ]

  rule :constant,         /[A-Z][a-zA-Z0-9_]*/
  rule :white,            /[ \t\n\r]/
  rule :comment,          /#.*/
  rule :space,            zero_or_more(any(:white, :comment))
end
VERSION =

The current version of Citrus as [major, minor, patch].

[3, 0, 2]

Class Method Summary collapse

Class Method Details

.cacheObject

Returns a map of paths of files that have been loaded via #load to the result of #eval on the code in that file.

Note: These paths are not absolute unless you pass an absolute path to #load. That means that if you change the working directory and try to #require the same file with a different relative path, it will be loaded twice.



29
30
31
# File 'lib/citrus.rb', line 29

def self.cache
  @cache ||= {}
end

.eval(code, options = {}) ⇒ Object

Evaluates the given Citrus parsing expression grammar code and returns an array of any grammar modules that are created. Accepts the same options as GrammarMethods#parse.

Citrus.eval("grammar MyGrammar\n  rule abc\n    \"abc\"\n  end\nend\n")
# => [MyGrammar]


46
47
48
# File 'lib/citrus.rb', line 46

def self.eval(code, options={})
  File.parse(code, options).value
end

.load(file, options = {}) ⇒ Object

Loads the grammar(s) from the given file. Accepts the same options as #eval, plus the following:

force

Normally this method will not reload a file that is already in the #cache. However, if this option is true the file will be loaded, regardless of whether or not it is in the cache. Defaults to false.

Citrus.load('mygrammar')
# => [MyGrammar]


71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/citrus.rb', line 71

def self.load(file, options={})
  file += '.citrus' unless /\.citrus$/ === file
  force = options.delete(:force)

  if force || !cache[file]
    begin
      cache[file] = eval(::File.read(file), options)
    rescue SyntaxError => e
      e.message.replace("#{::File.expand_path(file)}: #{e.message}")
      raise e
    end
  end

  cache[file]
end

.require(file, options = {}) ⇒ Object

Searches the $LOAD_PATH for a file with the .citrus suffix and attempts to load it via #load. Returns the path to the file that was loaded on success, nil on failure. Accepts the same options as #load.

path = Citrus.require('mygrammar')
# => "/path/to/mygrammar.citrus"
Citrus.cache[path]
# => [MyGrammar]


96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/citrus.rb', line 96

def self.require(file, options={})
  file += '.citrus' unless /\.citrus$/ === file
  found = nil

  paths = ['']
  paths += $LOAD_PATH unless Pathname.new(file).absolute?
  paths.each do |path|
    found = Dir[::File.join(path, file)].first
    break if found
  end

  if found
    Citrus.load(found, options)
  else
    raise LoadError, "Cannot find file #{file}"
  end

  found
end

.rule(expr, options = {}) ⇒ Object

Evaluates the given expression and creates a new Rule object from it. Accepts the same options as #eval.

Citrus.rule('"a" | "b"')
# => #<Citrus::Rule: ... >


56
57
58
# File 'lib/citrus.rb', line 56

def self.rule(expr, options={})
  eval(expr, options.merge(:root => :expression))
end

.versionObject

Returns the current version of Citrus as a string.



6
7
8
# File 'lib/citrus/version.rb', line 6

def self.version
  VERSION.join('.')
end