Class: Regextest::Front::Letter::TLetter

Inherits:
Object
  • Object
show all
Includes:
Common, CharClass, Range
Defined in:
lib/regextest/front/letter.rb

Constant Summary collapse

@@id =

a class variable for generating unique name of element

0
@@unicode_ranges =
{}

Constants included from Common

Common::TstConstDebug, Common::TstConstRecursionMax, Common::TstConstRepeatMax, Common::TstConstRetryMax, Common::TstConstRetryMaxSecond, Common::TstConstTimeout, Common::TstConstUnicodeCharSet, Common::TstFixnumMax

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Common

#TstLog, #TstMdPrint, #TstRand, #TstShuffle, #is_random?, #reset_random_called

Constructor Details

#initialize(type, val) ⇒ TLetter

Constructor



20
21
22
23
24
25
26
27
28
# File 'lib/regextest/front/letter.rb', line 20

def initialize(type, val)
  TstLog("TLetter: type:#{type}, value:#{val}")
  @options = nil
  @data_type = type
  @value = val[0] || ""
  @offset = val[1] || -1
  @length = val[2] || 0
  @obj = nil
end

Instance Attribute Details

#lengthObject (readonly)

Returns the value of attribute length.



30
31
32
# File 'lib/regextest/front/letter.rb', line 30

def length
  @length
end

#offsetObject (readonly)

Returns the value of attribute offset.



30
31
32
# File 'lib/regextest/front/letter.rb', line 30

def offset
  @offset
end

#valueObject (readonly)

Returns the value of attribute value.



30
31
32
# File 'lib/regextest/front/letter.rb', line 30

def value
  @value
end

Instance Method Details

#enumerateObject

enumerate codepoints



315
316
317
# File 'lib/regextest/front/letter.rb', line 315

def enumerate
  @obj.enumerate
end

#generate_any_char(val) ⇒ Object

generate whole set of letters (depends on option)



94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/regextest/front/letter.rb', line 94

def generate_any_char(val)
  if @options[:reg_options].is_unicode?
    obj = CharClass.new(TstConstUnicodeCharSet)
  else
    obj = CharClass.new(TstConstUnicodeCharSet)
    # obj = CharClass.new( [ TRange.new("\x20", "\x7e") ] )
  end
  
  # add new-line if multi-line option specified
  if( @options[:reg_options].is_multiline? )
      obj.add_ranges( [ TRange.new("\n") ] )
  end
  obj
end

#generate_char_class(val) ⇒ Object

generate POSIX character class (ie. [[:alpha:]], etc.)



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/regextest/front/letter.rb', line 230

def generate_char_class(val)
  if(md = val.match(/^\[\:(\^)?(\w+)\:\]$/))
    reverse = (md[1] && md[1]=="^")?true:false
    class_name = md[2]
  else
    raise "internal error, invalid POSIX class name(#{val})"
  end
  
  obj = nil
  if @options[:reg_options].is_unicode?
    obj = CharClass.new(class_name)
  else
    case class_name
    when 'alnum'
      obj = CharClass.new(
              [ TRange.new('a', 'z'), TRange.new('A', 'Z'),
                TRange.new('0', '9') ]
            )
    when 'alpha'
      obj = CharClass.new(
              [ TRange.new('a', 'z'), TRange.new('A', 'Z') ]
            )
    when 'cntrl'
      obj = CharClass.new(
              [ TRange.new("\x00", "\x1f"), TRange.new("\x7f") ]
            )
    when 'lower'
      obj = CharClass.new(
              [ TRange.new('a', 'z') ]
            )
    when 'print'
      obj = CharClass.new(
              [ TRange.new("\x20", "\x7e") ]
            )
    when 'space'
      obj = CharClass.new(
              [ TRange.new(' '), TRange.new("\n"), TRange.new("\r"), 
                TRange.new("\t"), TRange.new("\f"), TRange.new("\v") ]
            )
    when 'digit'
      obj = CharClass.new(
              [ TRange.new('0', '9') ]
            )
    when 'upper'
      obj = CharClass.new(
              [ TRange.new('A', 'Z') ]
            )
    when 'blank'
      obj = CharClass.new(
              [ TRange.new(' '), TRange.new("\t")  ]
            )
    when 'graph'
      obj = CharClass.new(
              [ TRange.new("\x21", "\x7e") ]
            )
    when 'punct'
      obj = CharClass.new(
              [ TRange.new("\x21", "\x23"), TRange.new("\x25", "\x2a"), 
                TRange.new("\x2c", "\x2f"), TRange.new("\x3a", "\x3b"),
                TRange.new("\x3f", "\x40"), TRange.new("\x5b", "\x5d"),
                TRange.new("\x5f"), TRange.new("\x7b"), TRange.new("\x7d") ]
            )
    when 'xdigit'
      obj = CharClass.new(
              [ TRange.new('a', 'f'), TRange.new('A', 'F'),
                TRange.new('0', '9') ]
            )
    when 'word'
      obj = CharClass.new(
              [ TRange.new('a', 'z'), TRange.new('A', 'Z'),
                TRange.new('0', '9'), TRange.new('_') ]
            )
    else
      raise "Error: Invalid character class #{val}"
    end
  end
  
  if reverse
    obj.set_reverse(@options)
  end
  
  obj
end

#generate_control_letter(val, type) ⇒ Object

generate control letter c-x, m-x



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/regextest/front/letter.rb', line 70

def generate_control_letter(val, type)
  suffix = val[-1..-1]
  codepoint = suffix.unpack("U*")[0]
  case type
  when :LEX_CONTROL_LETTER
    if    ('0'..'?').include?(suffix)
      result = codepoint - 0x20
    elsif ('@'..'_').include?(suffix)
      result = codepoint - 0x40
    elsif ('`'..'~').include?(suffix)
      result = codepoint - 0x60
    else
      raise "Internal error: invalid control letter (#{val})"
    end
  when :LEX_META_LETTER
    result = codepoint + 0x80
    pp [result].pack("U*")
  else
    raise "Internal error: invalid type #{type}"
  end
  @obj = CharClass.new([ TRange.new([result].pack("U*"))])
end

#generate_simplified_class(val) ⇒ Object

generate simplified character class



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/regextest/front/letter.rb', line 144

def generate_simplified_class(val)
  obj = nil
  case val
  when "\\w"
    if @options[:reg_options].is_unicode?
      obj = CharClass.new("Letter|Mark|Number|Connector_Punctuation")
    else
      obj = CharClass.new(
              [ TRange.new('a', 'z'), TRange.new('A', 'Z'),
                TRange.new('0', '9'), TRange.new('_') ]
            )
    end
  when "\\W"
    obj = CharClass.new(
            [ TRange.new("\x20", "\x2f"), TRange.new("\x3a", "\x40"),
              TRange.new("\x5b", "\x5e"), TRange.new("\x60"),
              TRange.new("\x7b", "\x7e") ]
          )
  when "\\d"
    if @options[:reg_options].is_unicode?
      #obj = CharClass.new([ TRange.new('0', '9'),  TRange.new('0', '9')])
      obj = CharClass.new("Decimal_Number")
    else
      obj = CharClass.new(
               [ TRange.new('0', '9') ]
             )
    end
  when "\\D"
    obj = CharClass.new(
            [ TRange.new("\x20", "\x2f"), TRange.new("\x3a", "\x7e") ]
          )
  when "\\h"
    obj = CharClass.new(
            [ TRange.new('0', '9') , TRange.new('a', 'f'), TRange.new('A', 'F')]
          )
  when "\\H"
    obj = CharClass.new(
            [ TRange.new("\x20", "\x2f"), TRange.new("\x3a", "\x40"),
              TRange.new("\x47", "\x60"), TRange.new("\x67", "\x7e")]
          )
  when "\\s"
    ascii_ranges = [ TRange.new(' '), TRange.new("\x9", "\xd") ]
    if @options[:reg_options].is_unicode?
      obj = CharClass.new("Line_Separator|Paragraph_Separator|Space_Separator")
      obj.add_ranges(ascii_ranges + [ TRange.new("\u{85}") ])
    else
      obj = CharClass.new(ascii_ranges)
    end
  when "\\S"
    obj = CharClass.new(
            [ TRange.new("\x21", "\x7e") ]
          )
  when "\\n", "\\r", "\\t", "\\f", "\\a", "\\e", "\\v"
    obj = CharClass.new(
            [ TRange.new(eval("\""+ string + "\"")) ]
          )
  when "\\b", "\\z", "\\A", "\\B", "\\G", "\\Z"
    warn "Ignored unsupported escape char #{val}."
  when "\\c", "\\x", "\\C", "\\M"
    raise "Error: Unsupported escape char #{string}"
  else
    raise "Error: Invalid simplifiled class #{val}"
  end
  obj
end

#generate_special_char(val) ⇒ Object

generate special character class



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/regextest/front/letter.rb', line 110

def generate_special_char(val)
  @data_type = :LEX_CHAR
  obj = nil
  case val
  when "\\R"
    if @options[:reg_options].is_unicode?
      # BUG: "\x0a\x0d" must be supported!
      obj = CharClass.new(
              [ TRange.new("\x0a", "\x0d"), TRange.new("\u{85}"),
                TRange.new("\u{2028}", "\u{2029}") ]
            )
    else
      # BUG: "\x0a\x0d" must be supported!
      obj = CharClass.new(
              [ TRange.new("\x0a", "\x0d") ]
            )
    end
  when "\\X"
    if @options[:reg_options].is_unicode?
      # BUG: (?>\P{M}\p{M}*)
      obj = CharClass.new("M")
      obj.set_reverse(@options)
    else
      obj = CharClass.new(
              [ TRange.new("\x20", "\x7e"), TRange.new("\n") ]
            )
    end
  else
    raise "Error: internal error, invalid special char: #{val}"
  end
  obj
end

#generate_unicode_char(val, type) ⇒ Object

generate Unicode class (ie. p… | P…)



211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/regextest/front/letter.rb', line 211

def generate_unicode_char(val, type)
  if(md = val.match(/(p|P)\{(\^?)(\w+)\}/))
    class_name = md[3].downcase
    reverse = (md[2] && md[2]=="^")?true:false
    
    obj = CharClass.new(class_name, type)
  else
    raise "Internal error, inconsistent Unicode class #{val}"
  end
  
  # ¥P{^...} is equivalent to \p{...}
  if((md[1] == "p" && !reverse) || (md[1] == "P" && reverse))
    obj
  else      # \P{}  or \p{^}
    obj.set_reverse(@options)
  end
end

#jsonObject

transform to json format



329
330
331
332
333
334
335
336
337
# File 'lib/regextest/front/letter.rb', line 329

def json
  @@id += 1
  charset = @options[:reg_options].charset
  "{" +
    "\"type\": \"#{@data_type}\", \"id\": \"L#{@@id}\", \"value\": #{@obj.json}, " +
     "\"offset\": #{@offset}, \"length\": #{@length}, " +
     "\"charset\": \"#{charset}\"" +
  "}"
end

#set_attr(type, val) ⇒ Object

generate character(s) corresponding type of the character



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/regextest/front/letter.rb', line 33

def set_attr(type, val)
  case type
  when :LEX_CHAR, :LEX_SPACE
    @data_type = :LEX_CHAR
    @obj = CharClass.new([ TRange.new(val)])
  when :LEX_SIMPLE_ESCAPE
    @data_type = :LEX_CHAR
    @obj = CharClass.new([ TRange.new(val[1..1])])
  when :LEX_CODE_LITERAL, :LEX_ESCAPED_LETTER, :LEX_UNICODE, :LEX_OCTET
    @data_type = :LEX_CHAR
    @obj = CharClass.new([ TRange.new(eval('"'+ val + '"'))])   # convert using ruby's eval
  when :LEX_CONTROL_LETTER, :LEX_META_LETTER
    @data_type = :LEX_CHAR
    @obj = generate_control_letter(val, type)
  when :LEX_BRACKET
    @obj = Regextest::Front::Bracket.new(val)
  when :LEX_SIMPLIFIED_CLASS
    @obj = generate_simplified_class(val)
  when :LEX_POSIX_CHAR_CLASS
    @obj = generate_char_class(val)
  when :LEX_UNICODE_CLASS
    @obj = generate_unicode_char(val, type)
  when :LEX_UNICODE_CLASS_BRACKET
    @obj = generate_unicode_char(val, type)
  when :LEX_ANY_LETTER
    @obj = generate_any_char(val)
  when :LEX_SPECIAL_LETTER
    @obj = generate_special_char(val)
  when :LEX_AND_AND
    raise "Internal error: enexpected LEX_AND_AND"
    @obj = CharClass.new([TRange.new(val)])
  else
    raise "Error: internal error, type:#{type} not implemented"
  end
end

#set_options(options) ⇒ Object

set options



320
321
322
323
324
325
326
# File 'lib/regextest/front/letter.rb', line 320

def set_options(options)
  TstLog("Letter set_options: #{options[:reg_options].inspect}")
  @options = options
  set_attr(@data_type, @value)
  @obj.set_options(options)
  self
end