Class: String
Defined Under Namespace
Classes: CheckError
Instance Method Summary collapse
- #ascii ⇒ Object
- #ascii_only? ⇒ Boolean
- #camel_to_hyphy ⇒ Object
- #check ⇒ Object
-
#display_length ⇒ Object
nasty multibyte hack for ruby 1.8.
- #each(&b) ⇒ Object
- #find_all_positions(x) ⇒ Object
- #normalize_whitespace ⇒ Object
- #ord ⇒ Object
-
#split_on_commas ⇒ Object
a very complicated regex found on teh internets to split on commas, unless they occurr within double quotes.
-
#split_on_commas_with_remainder ⇒ Object
ok, here we do it the hard way.
-
#to_set_of_symbols(split_on = nil) ⇒ Object
takes a list of words, and returns an array of symbols.
- #transcode(src_encoding = $encoding) ⇒ Object
- #wrap(len) ⇒ Object
Instance Method Details
#ascii ⇒ Object
377 378 379 380 381 382 383 384 385 386 387 388 |
# File 'lib/sup/util.rb', line 377 def ascii out = "" each_byte do |b| if (b & 128) != 0 out << "\\x#{b.to_s 16}" else out << b.chr end end out.force_encoding Encoding::UTF_8 if out.respond_to? :force_encoding out end |
#ascii_only? ⇒ Boolean
395 396 397 398 |
# File 'lib/sup/util.rb', line 395 def ascii_only? size.times { |i| return false if self[i] & 128 != 0 } return true end |
#camel_to_hyphy ⇒ Object
248 249 250 |
# File 'lib/sup/util.rb', line 248 def camel_to_hyphy self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase end |
#check ⇒ Object
368 369 370 371 372 373 374 375 |
# File 'lib/sup/util.rb', line 368 def check begin fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII) fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding? rescue raise CheckError.new($!.) end end |
#display_length ⇒ Object
nasty multibyte hack for ruby 1.8. if it’s utf-8, split into chars using the utf8 regex and count those. otherwise, use the byte length.
239 240 241 242 243 244 245 246 |
# File 'lib/sup/util.rb', line 239 def display_length if RUBY_VERSION < '1.9.1' && ($encoding == "UTF-8" || $encoding == "utf8") # scan hack is somewhat slow, worth trying to cache @display_length ||= scan(/./u).size else size end end |
#each(&b) ⇒ Object
355 356 357 |
# File 'lib/sup/util.rb', line 355 def each &b each_line &b end |
#find_all_positions(x) ⇒ Object
252 253 254 255 256 257 258 259 260 261 262 |
# File 'lib/sup/util.rb', line 252 def find_all_positions x ret = [] start = 0 while start < length pos = index x, start break if pos.nil? ret << pos start = pos + 1 end ret end |
#normalize_whitespace ⇒ Object
344 345 346 |
# File 'lib/sup/util.rb', line 344 def normalize_whitespace gsub(/\t/, " ").gsub(/\r/, "") end |
#ord ⇒ Object
349 350 351 |
# File 'lib/sup/util.rb', line 349 def ord self[0] end |
#split_on_commas ⇒ Object
a very complicated regex found on teh internets to split on commas, unless they occurr within double quotes.
266 267 268 |
# File 'lib/sup/util.rb', line 266 def split_on_commas normalize_whitespace().split(/,\s*(?=(?:[^"]*"[^"]*")*(?![^"]*"))/) end |
#split_on_commas_with_remainder ⇒ Object
ok, here we do it the hard way. got to have a remainder for purposes of tab-completing full email addresses
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 |
# File 'lib/sup/util.rb', line 272 def split_on_commas_with_remainder ret = [] state = :outstring pos = 0 region_start = 0 while pos <= length newpos = case state when :escaped_instring, :escaped_outstring then pos else index(/[,"\\]/, pos) end if newpos char = self[newpos] else char = nil newpos = length end case char when ?" state = case state when :outstring then :instring when :instring then :outstring when :escaped_instring then :instring when :escaped_outstring then :outstring end when ?,, nil state = case state when :outstring, :escaped_outstring then ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "") region_start = newpos + 1 :outstring when :instring then :instring when :escaped_instring then :instring end when ?\\ state = case state when :instring then :escaped_instring when :outstring then :escaped_outstring when :escaped_instring then :instring when :escaped_outstring then :outstring end end pos = newpos + 1 end remainder = case state when :instring self[region_start .. -1].gsub(/^\s+/, "") else nil end [ret, remainder] end |
#to_set_of_symbols(split_on = nil) ⇒ Object
takes a list of words, and returns an array of symbols. typically used in Sup for translating Xapian’s representation of a list of labels (a string) to an array of label symbols.
split_on will be passed to String#split, so you can leave this nil for space.
365 |
# File 'lib/sup/util.rb', line 365 def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end |
#transcode(src_encoding = $encoding) ⇒ Object
390 391 392 |
# File 'lib/sup/util.rb', line 390 def transcode src_encoding=$encoding Iconv.easy_decode $encoding, src_encoding, self end |
#wrap(len) ⇒ Object
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 |
# File 'lib/sup/util.rb', line 328 def wrap len ret = [] s = self while s.length > len cut = s[0 ... len].rindex(/\s/) if cut ret << s[0 ... cut] s = s[(cut + 1) .. -1] else ret << s[0 ... len] s = s[len .. -1] end end ret << s end |