Class: String

Inherits:
Object show all
Defined in:
lib/sup/util.rb

Defined Under Namespace

Classes: CheckError

Instance Method Summary collapse

Instance Method Details

#asciiObject



377
378
379
380
381
382
383
384
385
386
387
388
# File 'lib/sup/util.rb', line 377

def ascii
  out = ""
  each_byte do |b|
    if (b & 128) != 0
      out << "\\x#{b.to_s 16}"
    else
      out << b.chr
    end
  end
  out.force_encoding Encoding::UTF_8 if out.respond_to? :force_encoding
  out
end

#ascii_only?Boolean

Returns:

  • (Boolean)


395
396
397
398
# File 'lib/sup/util.rb', line 395

def ascii_only?
  size.times { |i| return false if self[i] & 128 != 0 }
  return true
end

#camel_to_hyphyObject



248
249
250
# File 'lib/sup/util.rb', line 248

def camel_to_hyphy
  self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase
end

#checkObject



368
369
370
371
372
373
374
375
# File 'lib/sup/util.rb', line 368

def check
  begin
    fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII)
    fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding?
  rescue
    raise CheckError.new($!.message)
  end
end

#display_lengthObject

nasty multibyte hack for ruby 1.8. if it’s utf-8, split into chars using the utf8 regex and count those. otherwise, use the byte length.



239
240
241
242
243
244
245
246
# File 'lib/sup/util.rb', line 239

def display_length
  if RUBY_VERSION < '1.9.1' && ($encoding == "UTF-8" || $encoding == "utf8")
    # scan hack is somewhat slow, worth trying to cache
    @display_length ||= scan(/./u).size
  else
    size
  end
end

#each(&b) ⇒ Object



355
356
357
# File 'lib/sup/util.rb', line 355

def each &b
  each_line &b
end

#find_all_positions(x) ⇒ Object



252
253
254
255
256
257
258
259
260
261
262
# File 'lib/sup/util.rb', line 252

def find_all_positions x
  ret = []
  start = 0
  while start < length
    pos = index x, start
    break if pos.nil?
    ret << pos
    start = pos + 1
  end
  ret
end

#normalize_whitespaceObject



344
345
346
# File 'lib/sup/util.rb', line 344

def normalize_whitespace
  gsub(/\t/, "    ").gsub(/\r/, "")
end

#ordObject



349
350
351
# File 'lib/sup/util.rb', line 349

def ord
  self[0]
end

#split_on_commasObject

a very complicated regex found on teh internets to split on commas, unless they occurr within double quotes.



266
267
268
# File 'lib/sup/util.rb', line 266

def split_on_commas
  normalize_whitespace().split(/,\s*(?=(?:[^"]*"[^"]*")*(?![^"]*"))/)
end

#split_on_commas_with_remainderObject

ok, here we do it the hard way. got to have a remainder for purposes of tab-completing full email addresses



272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/sup/util.rb', line 272

def split_on_commas_with_remainder
  ret = []
  state = :outstring
  pos = 0
  region_start = 0
  while pos <= length
    newpos = case state
      when :escaped_instring, :escaped_outstring then pos
      else index(/[,"\\]/, pos)
    end

    if newpos
      char = self[newpos]
    else
      char = nil
      newpos = length
    end

    case char
    when ?"
      state = case state
        when :outstring then :instring
        when :instring then :outstring
        when :escaped_instring then :instring
        when :escaped_outstring then :outstring
      end
    when ?,, nil
      state = case state
        when :outstring, :escaped_outstring then
          ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "")
          region_start = newpos + 1
          :outstring
        when :instring then :instring
        when :escaped_instring then :instring
      end
    when ?\\
      state = case state
        when :instring then :escaped_instring
        when :outstring then :escaped_outstring
        when :escaped_instring then :instring
        when :escaped_outstring then :outstring
      end
    end
    pos = newpos + 1
  end

  remainder = case state
    when :instring
      self[region_start .. -1].gsub(/^\s+/, "")
    else
      nil
    end

  [ret, remainder]
end

#to_set_of_symbols(split_on = nil) ⇒ Object

takes a list of words, and returns an array of symbols. typically used in Sup for translating Xapian’s representation of a list of labels (a string) to an array of label symbols.

split_on will be passed to String#split, so you can leave this nil for space.



365
# File 'lib/sup/util.rb', line 365

def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end

#transcode(src_encoding = $encoding) ⇒ Object



390
391
392
# File 'lib/sup/util.rb', line 390

def transcode src_encoding=$encoding
  Iconv.easy_decode $encoding, src_encoding, self
end

#wrap(len) ⇒ Object



328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/sup/util.rb', line 328

def wrap len
  ret = []
  s = self
  while s.length > len
    cut = s[0 ... len].rindex(/\s/)
    if cut
      ret << s[0 ... cut]
      s = s[(cut + 1) .. -1]
    else
      ret << s[0 ... len]
      s = s[len .. -1]
    end
  end
  ret << s
end