Class: J2119::Matcher

Inherits:

Object

Object
J2119::Matcher

show all

Defined in:: lib/j2119/matcher.rb

Overview

Does the heavy lifting of parsing j2119 files to extract all

the assertions, making egregious use of regular expressions.

This is the kind of thing I actively discourage when other

programmers suggest it.  If I were a real grown-up I'd
implement a proper lexer and bullet-proof parser.

Constant Summary collapse

MUST =

'(?<modal>MUST|MAY|MUST NOT)'

TYPES =

[
  'array',
  'object',
  'string',
  'boolean',
  'numeric',
  'integer',
  'float',
  'timestamp',
  'JSONPath',
  'referencePath',
  'URI'
]

RELATIONS =

[
  '', 'equal to', 'greater than', 'less than',
  'greater than or equal to', 'less than or equal to'
].join('|')

RELATION =

"((?<relation>#{RELATIONS})\\s+)"

S = string

'"[^"]*"'

V = non-string value: number, true, false, null

'\S+'

RELATIONAL =

"#{RELATION}(?<target>#{S}|#{V})"

CHILD_ROLE =

';\s+((its\s+(?<child_type>value))|' +
'each\s+(?<child_type>field|element))' +
'\s+is\s+an?\s+' +
'"(?<child_role>[^"]+)"'

@@initialized =

false

Instance Attribute Summary collapse

#constraint_match ⇒ Object readonly

crutch for RE debugging.
#eachof_match ⇒ Object readonly

actual exports.
#only_one_match ⇒ Object readonly

crutch for RE debugging.
#role_matcher ⇒ Object readonly

actual exports.
#roledef_match ⇒ Object readonly

crutch for RE debugging.

Class Method Summary collapse

.tokenize_strings(s) ⇒ Object

Instance Method Summary collapse

#add_role(role) ⇒ Object
#build(re, line) ⇒ Object
#build_constraint(line) ⇒ Object
#build_only_one(line) ⇒ Object
#build_role_def(line) ⇒ Object
#constants ⇒ Object

constants that need help from oxford.
#initialize(root) ⇒ Matcher constructor

A new instance of Matcher.
#is_constraint_line(line) ⇒ Object
#is_only_one_match_line(line) ⇒ Object
#is_role_def_line(line) ⇒ Object
#make_type_regex ⇒ Object
#reconstruct ⇒ Object
#tokenize_values(vals) ⇒ Object

Constructor Details

#initialize(root) ⇒ `Matcher`

Returns a new instance of Matcher.

# File 'lib/j2119/matcher.rb', line 147

def initialize(root)
  constants
  @roles = []
  add_role root
  reconstruct
end

Instance Attribute Details

#constraint_match ⇒ `Object` (readonly)

crutch for RE debugging



26
27
28

# File 'lib/j2119/matcher.rb', line 26

def constraint_match
  @constraint_match
end

#eachof_match ⇒ `Object` (readonly)

actual exports



29
30
31

# File 'lib/j2119/matcher.rb', line 29

def eachof_match
  @eachof_match
end

#only_one_match ⇒ `Object` (readonly)

crutch for RE debugging



26
27
28

# File 'lib/j2119/matcher.rb', line 26

def only_one_match
  @only_one_match
end

#role_matcher ⇒ `Object` (readonly)

actual exports



29
30
31

# File 'lib/j2119/matcher.rb', line 29

def role_matcher
  @role_matcher
end

#roledef_match ⇒ `Object` (readonly)

crutch for RE debugging



26
27
28

# File 'lib/j2119/matcher.rb', line 26

def roledef_match
  @roledef_match
end

Class Method Details

.tokenize_strings(s) ⇒ `Object`

# File 'lib/j2119/matcher.rb', line 160

def self.tokenize_strings(s)
  # should be a way to do this with capture groups but I'm not smart enough
  strings = []
  r = Regexp.new '^[^"]*"([^"]*)"'
  while s =~ r
    strings << $1
    s = $'
  end
  strings
end

Instance Method Details

#add_role(role) ⇒ `Object`

# File 'lib/j2119/matcher.rb', line 154

def add_role(role)
  @roles << role
  @role_matcher = @roles.join('|')
  reconstruct
end

#build(re, line) ⇒ `Object`

# File 'lib/j2119/matcher.rb', line 205

def build(re, line)
  data = {}
  match = re.match(line)
  unless match
    puts "No names for: #{line}"
  end
  match.names.each do |name|
    data[name] = match[name]
  end
  data
end

#build_constraint(line) ⇒ `Object`



229
230
231

# File 'lib/j2119/matcher.rb', line 229

def build_constraint(line)
  build(@constraint_match, line)
end

#build_only_one(line) ⇒ `Object`



217
218
219

# File 'lib/j2119/matcher.rb', line 217

def build_only_one(line)
  build(@only_one_match, line)
end

#build_role_def(line) ⇒ `Object`



201
202
203

# File 'lib/j2119/matcher.rb', line 201

def build_role_def(line)
  build(@roledef_match, line)
end

#constants ⇒ `Object`

constants that need help from oxford

# File 'lib/j2119/matcher.rb', line 64

def constants
  if !@@initialized
    @@initialized = true

    @@strings = Oxford.re(S, :capture_name => 'strings')
    enum = "one\s+of\s+#{@@strings}"

    @@predicate = "(#{RELATIONAL}|#{enum})"
  end
end

#is_constraint_line(line) ⇒ `Object`



221
222
223

# File 'lib/j2119/matcher.rb', line 221

def is_constraint_line(line)
  line =~ @constraint_start
end

#is_only_one_match_line(line) ⇒ `Object`



225
226
227

# File 'lib/j2119/matcher.rb', line 225

def is_only_one_match_line(line)
  line =~ @only_one_start
end

#is_role_def_line(line) ⇒ `Object`



197
198
199

# File 'lib/j2119/matcher.rb', line 197

def is_role_def_line(line)
  line =~ %r{is\s+an?\s+"[^"]*"\.\s*$}
end

#make_type_regex ⇒ `Object`

# File 'lib/j2119/matcher.rb', line 175

def make_type_regex
  
  # add modified numeric types
  types = TYPES.clone
  number_types = [ 'float', 'integer', 'numeric' ]
  number_modifiers = [ 'positive', 'negative', 'nonnegative' ]
  number_types.each do |number_type|
    number_modifiers.each do |number_modifier|
      types << "#{number_modifier}-#{number_type}"
    end
  end
  
  # add array types
  array_types = types.map { |t| "#{t}-array" }
  types |= array_types
  nonempty_array_types = array_types.map { |t| "nonempty-#{t}" }
  types |= nonempty_array_types
  nullable_types = types.map { |t| "nullable-#{t}" }
  types |= nullable_types
  @type_regex = types.join('|')
end

#reconstruct ⇒ `Object`

# File 'lib/j2119/matcher.rb', line 75

def reconstruct
  make_type_regex

  # conditional clause
  excluded_roles = "not\\s+" +
                   Oxford.re(@role_matcher,
                             :capture_name => 'excluded',
                             :use_article => true) +
                   "\\s+"
  conditional = "which\\s+is\\s+" +
                excluded_roles

  # regex for matching constraint lines
  c_start = '^An?\s+' +
            "(?<role>#{@role_matcher})" + '\s+' +
            "(#{conditional})?" +
            MUST + '\s+have\s+an?\s+'
  field_list = "one\\s+of\\s+" +
               Oxford.re('"[^"]+"', :capture_name => 'field_list')
  c_match = c_start + 
            "((?<type>#{@type_regex})\\s+)?" +
            "field\\s+named\\s+" +
            "((\"(?<field_name>[^\"]+)\")|(#{field_list}))" +
            '(\s+whose\s+value\s+MUST\s+be\s+' + @@predicate + ')?' +
            '(' + CHILD_ROLE + ')?' +
            '\.'

  # regexp for matching lines of the form
  #  "An X MUST have only one of "Y", "Z", and "W".
  #  There's a pattern here, building a separate regex rather than
  #  adding more complexity to @constraint_matcher.  Any further
  #  additions should be done this way, and
  #  TODO: Break @constraint_matcher into a bunch of smaller patterns
  #  like this.
  oo_start = '^An?\s+' +
            "(?<role>#{@role_matcher})" + '\s+' +
             MUST + '\s+have\s+only\s+'
  oo_field_list = "one\\s+of\\s+" +
                  Oxford.re('"[^"]+"',
                            :capture_name => 'field_list',
                            :connector => 'and')
  oo_match = oo_start + oo_field_list

  # regex for matching role-def lines
  val_match = "whose\\s+\"(?<fieldtomatch>[^\"]+)\"" +
              "\\s+field's\\s+value\\s+is\\s+" +
              "(?<valtomatch>(\"[^\"]*\")|([^\"\\s]\\S+))\\s+"
  with_a_match = "with\\s+an?\\s+\"(?<with_a_field>[^\"]+)\"\\s+field\\s"

  rd_match = '^An?\s+' +
             "(?<role>#{@role_matcher})" + '\s+' +
             "((?<val_match_present>#{val_match})|(#{with_a_match}))?" +
             "is\\s+an?\\s+" +
             "\"(?<newrole>[^\"]*)\"\\.\\s*$"
  @roledef_match = Regexp.new(rd_match)

  @constraint_start = Regexp.new(c_start)
  @constraint_match = Regexp.new(c_match)

  @only_one_start = Regexp.new(oo_start)
  @only_one_match = Regexp.new(oo_match)
                   
  eo_match = "^Each\\s+of\\s" +
             Oxford.re(@role_matcher,
                       :capture_name => 'each_of',
                       :use_article => true,
                       :connector => 'and') +
             "\\s+(?<trailer>.*)$"

  @eachof_match = Regexp.new(eo_match)
end

#tokenize_values(vals) ⇒ `Object`



171
172
173

# File 'lib/j2119/matcher.rb', line 171

def tokenize_values(vals)
  vals.gsub(',', ' ').gsub('or', ' ').split(/\s+/)
end

Class: J2119::Matcher

Overview

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(root) ⇒ Matcher

Instance Attribute Details

#constraint_match ⇒ Object (readonly)

#eachof_match ⇒ Object (readonly)

#only_one_match ⇒ Object (readonly)

#role_matcher ⇒ Object (readonly)

#roledef_match ⇒ Object (readonly)

Class Method Details

.tokenize_strings(s) ⇒ Object

Instance Method Details

#add_role(role) ⇒ Object

#build(re, line) ⇒ Object

#build_constraint(line) ⇒ Object

#build_only_one(line) ⇒ Object

#build_role_def(line) ⇒ Object

#constants ⇒ Object

#is_constraint_line(line) ⇒ Object

#is_only_one_match_line(line) ⇒ Object

#is_role_def_line(line) ⇒ Object

#make_type_regex ⇒ Object

#reconstruct ⇒ Object

#tokenize_values(vals) ⇒ Object

#initialize(root) ⇒ `Matcher`

#constraint_match ⇒ `Object` (readonly)

#eachof_match ⇒ `Object` (readonly)

#only_one_match ⇒ `Object` (readonly)

#role_matcher ⇒ `Object` (readonly)

#roledef_match ⇒ `Object` (readonly)

.tokenize_strings(s) ⇒ `Object`

#add_role(role) ⇒ `Object`

#build(re, line) ⇒ `Object`

#build_constraint(line) ⇒ `Object`

#build_only_one(line) ⇒ `Object`

#build_role_def(line) ⇒ `Object`

#constants ⇒ `Object`

#is_constraint_line(line) ⇒ `Object`

#is_only_one_match_line(line) ⇒ `Object`

#is_role_def_line(line) ⇒ `Object`

#make_type_regex ⇒ `Object`

#reconstruct ⇒ `Object`

#tokenize_values(vals) ⇒ `Object`