Class: Slaw::ActGenerator

Inherits:
Object
  • Object
show all
Defined in:
lib/slaw/generator.rb

Overview

Base class for generating Act documents

Constant Summary collapse

@@parsers =
{}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(grammar) ⇒ ActGenerator

Returns a new instance of ActGenerator.



15
16
17
18
19
20
21
22
# File 'lib/slaw/generator.rb', line 15

def initialize(grammar)
  @grammar = grammar

  @parser = build_parser
  @builder = Slaw::Parse::Builder.new(parser: @parser)
  @parser = @builder.parser
  @cleanser = Slaw::Parse::Cleanser.new
end

Instance Attribute Details

#builderObject

Slaw::Parse::Builder

builder used by the generator



11
12
13
# File 'lib/slaw/generator.rb', line 11

def builder
  @builder
end

#parserObject

Treetop::Runtime::CompiledParser

compiled parser



8
9
10
# File 'lib/slaw/generator.rb', line 8

def parser
  @parser
end

Instance Method Details

#build_parserObject



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/slaw/generator.rb', line 24

def build_parser
  unless @@parsers[@grammar]
    # load the grammar with polyglot and treetop
    # this will ensure the class below is available
    # see: http://cjheath.github.io/treetop/using_in_ruby.html
    require "slaw/grammars/#{@grammar}/act"
    grammar_class = "Slaw::Grammars::#{@grammar.upcase}::ActParser"
    @@parsers[@grammar] = eval(grammar_class)
  end

  @parser = @@parsers[@grammar].new
  @parser.root = :act

  @parser
end

#cleanup(text) ⇒ Object

Run basic cleanup on text, such as ensuring clean newlines and removing tabs. This is always automatically done before processing.



52
53
54
# File 'lib/slaw/generator.rb', line 52

def cleanup(text)
  @cleanser.cleanup(text)
end

#generate_from_text(text) ⇒ Nokogiri::Document

Generate a Slaw::Act instance from plain text.

Parameters:

  • text (String)

    plain text

Returns:

  • (Nokogiri::Document)

    the resulting xml



45
46
47
# File 'lib/slaw/generator.rb', line 45

def generate_from_text(text)
  @builder.parse_and_process_text(cleanup(text))
end

#guess_section_number_after_title(text) ⇒ Object

Try to determine if section numbers come after titles, rather than before.

eg:

Section title
1. Section content

versus

1. Section title
Section content


75
76
77
78
79
80
# File 'lib/slaw/generator.rb', line 75

def guess_section_number_after_title(text)
  before = text.scan(/^\w{4,}[^\n]+\n\d+\. /).length
  after  = text.scan(/^\s*\n\d+\. \w{4,}/).length

  before > after * 1.25
end

#reformat(text) ⇒ Object

Reformat some common errors in text to help make parsing more successful. Option and only recommended when processing a document for the first time.



59
60
61
# File 'lib/slaw/generator.rb', line 59

def reformat(text)
  @cleanser.reformat(text)
end

#text_from_act(doc) ⇒ Object

Transform an Akoma Ntoso XML document back into a plain-text version suitable for re-parsing back into XML with no loss of structure.



84
85
86
87
88
89
90
91
92
93
94
# File 'lib/slaw/generator.rb', line 84

def text_from_act(doc)
  # look on the load path for an XSL file for this grammar
  filename = "/slaw/grammars/#{@grammar}/act_text.xsl"

  if dir = $LOAD_PATH.find { |p| File.exist?(p + filename) }
    xslt = Nokogiri::XSLT(File.read(dir + filename))
    xslt.apply_to(doc).gsub(/^( *\n){2,}/, "\n")
  else
    raise "Unable to find text XSL for grammar #{@grammar}: #{fragment}"
  end
end