Class: String

Inherits:
Object show all
Defined in:
lib/nano/string/brief.rb,
lib/nano/string/at.rb,
lib/nano/string/cmp.rb,
lib/nano/string/pop.rb,
lib/nano/string/tab.rb,
lib/nano/string/fold.rb,
lib/nano/string/last.rb,
lib/nano/string/push.rb,
lib/nano/string/slap.rb,
lib/nano/string/succ.rb,
lib/nano/string/to_a.rb,
lib/nano/string/to_b.rb,
lib/nano/string/bytes.rb,
lib/nano/string/chars.rb,
lib/nano/string/first.rb,
lib/nano/string/lines.rb,
lib/nano/string/mscan.rb,
lib/nano/string/nchar.rb,
lib/nano/string/quote.rb,
lib/nano/string/range.rb,
lib/nano/string/shift.rb,
lib/nano/string/tabto.rb,
lib/nano/string/to_re.rb,
lib/nano/string/words.rb,
lib/nano/string/indent.rb,
lib/nano/string/margin.rb,
lib/nano/string/natcmp.rb,
lib/nano/string/plural.rb,
lib/nano/string/to_arr.rb,
lib/nano/string/unpack.rb,
lib/nano/string/at_rand.rb,
lib/nano/string/bracket.rb,
lib/nano/string/dequote.rb,
lib/nano/string/dresner.rb,
lib/nano/string/last%21.rb,
lib/nano/string/last%3D.rb,
lib/nano/string/ordinal.rb,
lib/nano/string/shatter.rb,
lib/nano/string/shuffle.rb,
lib/nano/string/soundex.rb,
lib/nano/string/to_date.rb,
lib/nano/string/to_proc.rb,
lib/nano/string/to_time.rb,
lib/nano/string/unshift.rb,
lib/nano/string/basename.rb,
lib/nano/string/blank%3F.rb,
lib/nano/string/camelize.rb,
lib/nano/string/first%21.rb,
lib/nano/string/first%3D.rb,
lib/nano/string/humanize.rb,
lib/nano/string/singular.rb,
lib/nano/string/to_const.rb,
lib/nano/string/camelcase.rb,
lib/nano/string/each_char.rb,
lib/nano/string/each_word.rb,
lib/nano/string/frequency.rb,
lib/nano/string/index_all.rb,
lib/nano/string/last_char.rb,
lib/nano/string/line_wrap.rb,
lib/nano/string/rand_byte.rb,
lib/nano/string/range_all.rb,
lib/nano/string/upcase%3F.rb,
lib/nano/string/word_wrap.rb,
lib/nano/string/align_left.rb,
lib/nano/string/at_rand%21.rb,
lib/nano/string/bracket%21.rb,
lib/nano/string/demodulize.rb,
lib/nano/string/dresner%21.rb,
lib/nano/string/first_char.rb,
lib/nano/string/rand_index.rb,
lib/nano/string/similarity.rb,
lib/nano/string/underscore.rb,
lib/nano/string/unix_crypt.rb,
lib/nano/enumerable/entropy.rb,
lib/nano/string/align_right.rb,
lib/nano/string/downcase%3F.rb,
lib/nano/string/probability.rb,
lib/nano/string/word_filter.rb,
lib/nano/string/align_center.rb,
lib/nano/string/lowercase%3F.rb,
lib/nano/string/rand_byte%21.rb,
lib/nano/string/uppercase%3F.rb,
lib/nano/string/word_wrap%21.rb,
lib/nano/string/range_of_line.rb,
lib/nano/string/unix_crypt%21.rb,
lib/nano/string/whitespace%3F.rb,
lib/nano/string/%3A%3Apatterns.rb,
lib/nano/string/capitalized%3F.rb,
lib/nano/string/word_filter%21.rb,
lib/nano/string/%3A%3Arand_letter.rb

Overview

– Credit goes to Phil Tomson. ++

Constant Summary collapse

PLURAL_RULES =
[
  [/^(ox)$/i, '\1\2en'],                 # ox
  [/([m|l])ouse$/i, '\1ice'],            # mouse, louse
  [/(matr|vert)ix|ex$/i, '\1ices'],      # matrix, vertex, index
  [/(x|ch|ss|sh)$/i, '\1es'],            # search, switch, fix, box, process, address
  [/([^aeiouy]|qu)ies$/i, '\1y'],
  [/([^aeiouy]|qu)y$/i, '\1ies'],        # query, ability, agency
  [/(hive)$/i, '\1s'],                   # archive, hive
  [/(?:([^f])fe|([lr])f)$/i, '\1\2ves'], # half, safe, wife
  [/sis$/i, 'ses'],                      # basis, diagnosis
  [/([ti])um$/i, '\1a'],                 # datum, medium
  [/(p)erson$/i, '\1eople'],             # person, salesperson
  [/(m)an$/i, '\1en'],                   # man, woman, spokesman
  [/(c)hild$/i, '\1hildren'],            # child
  [/(buffal|tomat|potat)o$/i, '\1\2oes'],      # buffalo, tomato
  [/(bu)s$/i, '\1\2ses'],                # bus
  [/(alias)/i, '\1es'],                  # alias
  [/(octop|vir)us$/i, '\1i'],            # octopus, virus - virus has no defined plural, but viri is better than viruses/viruss
  [/(ax|cri|test)is$/i, '\1es'],         # axis, crisis  
  [/s$/i, 's'],                          # no change (compatibility)
  [/$/, 's']
]
BRA_KET =
{ '['=>']', '('=>')', '{'=>'}', '<'=>'>' }
SINGULAR_RULES =
[
  [/(matr)ices$/i, '\1ix'],
  [/(vert)ices$/i, '\1ex'],
  [/^(ox)en/i, '\1'],
  [/(alias)es$/i, '\1'],
  [/([octop|vir])i$/i, '\1us'],
  [/(cris|ax|test)es$/i, '\1is'],
  [/(shoe)s$/i, '\1'],
  [/(o)es$/i, '\1'],
  [/(bus)es$/i, '\1'],
  [/([m|l])ice$/i, '\1ouse'],
  [/(x|ch|ss|sh)es$/i, '\1'],
  [/(m)ovies$/i, '\1\2ovie'],
  [/(s)eries$/i, '\1\2eries'],
  [/([^aeiouy]|qu)ies$/i, '\1y'],
  [/([lr])ves$/i, '\1f'],
  [/(tive)s$/i, '\1'],
  [/(hive)s$/i, '\1'],
  [/([^f])ves$/i, '\1fe'],
  [/(^analy)ses$/i, '\1sis'],
  [/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i, '\1\2sis'],
  [/([ti])a$/i, '\1um'],
  [/(p)eople$/i, '\1\2erson'],
  [/(m)en$/i, '\1an'],
  [/(s)tatus$/i, '\1\2tatus'],
  [/(c)hildren$/i, '\1\2hild'],
  [/(n)ews$/i, '\1\2ews'],
  [/s$/i, '']
]

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.patterns(pattern) ⇒ Object

Returns a Regexp pattern based on the given pattern string or symbolic name.

Recognized names are:

  • :char

  • :word

  • :line

They are also recognizied in plural form.



14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/nano/string/%3A%3Apatterns.rb', line 14

def self.patterns( pattern )
  case pattern
  when :char, :chars, :character, :characters
    pattern = //
  when :word, :words
    pattern = /\s+|\Z/
  when :line, :lines
    pattern = /\Z/
  when String
    pattern = Regexp.new(Regexp.escape(pattern))
  end
  pattern
end

.rand_letterObject

Module method to generate a random letter.

String::Random.rand_letter  #=> "q"
String::Random.rand_letter  #=> "r"
String::Random.rand_letter  #=> "a"


9
10
11
# File 'lib/nano/string/%3A%3Arand_letter.rb', line 9

def self.rand_letter
  (rand(26) + (rand(2) == 0 ? 65 : 97) ).chr
end

.random(max_length = 8, char_re = /[\w\d]/) ⇒ Object

Returns a randomly generated string. One possible use is password initialization. Takes a max legnth of characters (default 8) and an optional valid char Regexp (default /wd/).

Credit goes to George Moschovitis.

:NOTE: This is not very efficient. Better way?

++

Raises:

  • (ArgumentError)


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/nano/string/%3A%3Arandom.rb', line 13

def String.random(max_length = 8, char_re = /[\w\d]/)
  # gmosx: this is a nice example of input parameter checking.
  # this is NOT a real time called method so we can add this
  # check. Congrats to the author.
  raise ArgumentError.new('char_re must be a regular expression!') unless char_re.is_a?(Regexp)

  string = ""

  while string.length < max_length
      ch = rand(255).chr
      string << ch if ch =~ char_re
  end

  return string
end

.soundex_code(character) ⇒ Object

Support function for String#soundex. Returns code for a single character.



40
41
42
# File 'lib/nano/string/soundex.rb', line 40

def String.soundex_code(character)
  character.tr! "AEIOUYWHBPFVCSKGJQXZDTLMNR", "00000000111122222222334556"
end

Instance Method Details

#align_center(n, sep = "\n", c = ' ') ⇒ Object

Centers each line of a string.

s = "This is a test\nand\nso on\n"
puts s.align_center(14)

produces

This is a test
     and
    so on

Align a string to the center. The defualt alignment seperation is a new line (“/n”) This can be changed as can be the padding string which defaults to a single space (‘ ’).



25
26
27
28
29
30
31
# File 'lib/nano/string/align_center.rb', line 25

def align_center( n, sep="\n", c=' ' )
  return center(n.to_i,c.to_s) if sep==nil
  q = split(sep.to_s).collect { |line|
    line.center(n.to_i,c.to_s)
  }
  q.join(sep.to_s)
end

#align_left(n, sep = "\n", c = ' ') ⇒ Object

Align a string to the left. The defualt alignment seperation is a new line (“/n”) This can be changes as can be the padding string which defaults to a single space (‘ ’).



10
11
12
13
14
15
16
# File 'lib/nano/string/align_left.rb', line 10

def align_left( n, sep="\n", c=' ' )
  return ljust(n.to_i,c.to_s) if sep==nil
  q = split(sep.to_s).collect { |line|
    line.ljust(n.to_i,c.to_s)
  }
  q.join(sep.to_s)
end

#align_right(n, sep = "\n", c = ' ') ⇒ Object

Align a string to the right. The defualt alignment seperation is a new line (“/n”) This can be changes as can be the padding string which defaults to a single space (‘ ’).



10
11
12
13
14
15
16
# File 'lib/nano/string/align_right.rb', line 10

def align_right( n, sep="\n", c=' ' )
  return rjust(n.to_i,c.to_s) if sep==nil
  q = split(sep.to_s).collect { |line|
    line.rjust(n.to_i,c.to_s)
  }
  q.join(sep.to_s)
end

#at(index) ⇒ Object



2
3
4
# File 'lib/nano/string/at.rb', line 2

def at(index)
  slice(index)
end

#at_rand(separator = //) ⇒ Object

Return a random separation of the string. Default separation is by charaacter.

"Ruby rules".at_rand(' ')  #=> ["Ruby"]


12
13
14
15
# File 'lib/nano/string/at_rand.rb', line 12

def at_rand( separator=// )
  separator = self.class.patterns( separator )
  self.split(separator,-1).at_rand
end

#at_rand!(separator = //) ⇒ Object

Return a random separation while removing it from the string. Default separation is by character.

s = "Ruby rules"
s = at_rand!(' ')  #=> "Ruby"
s                  #=> "rules"


14
15
16
17
18
19
20
21
22
# File 'lib/nano/string/at_rand%21.rb', line 14

def at_rand!( separator=// )
  separator = self.class.patterns( separator )
  a = self.shatter( separator )
  w = []; a.each_with_index { |s,i| i % 2 == 0 ? w << s : w.last << s }
  i = rand( w.size )
  r = w.delete_at( i )
  self.replace( w.join('') )
  return r
end

#basenameObject Also known as: demodulize

Removes prepend module namespace.

"Test::Unit".basename  #=> "Unit"


8
9
10
# File 'lib/nano/string/basename.rb', line 8

def basename
  self.to_s.gsub(/^.*::/, '')
end

#blank?Boolean Also known as: whitespace?

Is this string just whitespace?

"abc".blank?  #=> false
"   ".blank?  #=> true


9
10
11
# File 'lib/nano/string/blank%3F.rb', line 9

def blank?
  self !~ /\S/
end

#bracket(bra, ket = nil) ⇒ Object

Return a new string embraced by given brakets. If only one bracket char is given it will be placed on either side.

"wrap me".bracket('{')        #=> "{wrap me}"
"wrap me".bracket('--','!')   #=> "--wrap me!"


14
15
16
17
# File 'lib/nano/string/bracket.rb', line 14

def bracket(bra, ket=nil)
  ket = BRA_KET[$&] if ! ket && /^[\[({<]$/ =~ bra
  return "#{bra}#{self}#{ket ? ket : bra}"
end

#bracket!(bra, ket = nil) ⇒ Object

Inplace version of #braket.



6
7
8
# File 'lib/nano/string/bracket%21.rb', line 6

def bracket!(bra, ket=nil)
  self.replace( self.bracket(bra, ket) )
end

#brief(string, count = 128, force_cutoff = false, ellipsis = "...") ⇒ Object

Returns short abstract of long strings (first ‘count’ characters, chopped at the nearest word, appended by ‘…’) force_cutoff: break forcibly at ‘count’ chars. Does not accept count < 2.



10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/nano/string/brief.rb', line 10

def brief(string, count = 128, force_cutoff = false, ellipsis="...")
  return nil unless string
  return nil if count < 2

  if string.size > count
    cut_at = force_cutoff ? count : (string.index(' ', count-1) || count)
    xstring = string.slice(0, cut_at)
    return xstring.chomp(" ") + ellipsis
  else
    return string
  end
end

#bytesObject

Upacks string ginto bytes.



6
7
8
# File 'lib/nano/string/bytes.rb', line 6

def bytes
  self.unpack('C*')
end

#camelcase(first = false, on = '_\s') ⇒ Object

Converts a string to camelcase. By default capitalization occurs on whitespace and underscores. By setting the first parameter to true the first character can also be captizlized. The second parameter can be assigned a valid Regualr Expression characeter set to determine which characters to match for capitalizing subsequent parts of the string.

"this_is a test".camelcase             #=> "thisIsATest"
"this_is a test".camelcase(true)       #=> "ThisIsATest"
"this_is a test".camelcase(true, ' ')  #=> "This_isATest"


16
17
18
19
20
21
22
# File 'lib/nano/string/camelcase.rb', line 16

def camelcase( first=false, on='_\s' )
  if first
    gsub(/(^|[#{on}]+)([A-Za-z])/){ $2.upcase }
  else
    gsub(/([#{on}]+)([A-Za-z])/){ $2.upcase }
  end
end

#camelizeObject

Variation of coverting a string to camelcase. This is unlike #camelcase in that it is geared toward code reflection use.

"this/is_a_test".camelize  #=> This::IsATest


9
10
11
12
# File 'lib/nano/string/camelize.rb', line 9

def camelize
  #to_s.gsub(/(^|_)(.)/){$2.upcase}
  to_s.gsub(/\/(.?)/){ "::" + $1.upcase }.gsub(/(^|_)(.)/){ $2.upcase }
end

#capitalized?Boolean

Return true if the string is capitalized, otherwise false.

"THIS".capitalized?  #=> true
"This".capitalized?  #=> true
"this".capitalized?  #=> false

– Credit goes to Phil Tomson. ++



13
14
15
# File 'lib/nano/string/capitalized%3F.rb', line 13

def capitalized?
  self =~ /^[A-Z]/
end

#charsObject

Returns an array of characters.

"abc".chars  #=> ["a","b","c"]


8
9
10
# File 'lib/nano/string/chars.rb', line 8

def chars
  self.split(//)
end

#cmp(other) ⇒ Object

Compare method that takes length into account. Unlike #<=>, this is compatible with #succ.

"abc".cmp("abc")   #=>  0
"abcd".cmp("abc")  #=>  1
"abc".cmp("abcd")  #=> -1
"xyz".cmp("abc")   #=>  1


12
13
14
15
16
# File 'lib/nano/string/cmp.rb', line 12

def cmp(other)
  return -1 if length < other.length
  return 1 if length > other.length
  self <=> other  # alphabetic compare
end

#dequoteObject



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/nano/string/dequote.rb', line 3

def dequote
  s = self.dup

  case self[0,1]
  when "'", '"', '`'
    s[0] = ''
  end

  case self[-1,1]
  when "'", '"', '`'
    s[-1] = ''
  end

  s
end

#downcase?Boolean Also known as: lowercase?

Return true if the string is lowercase (downcase), otherwise false.

"THIS".downcase?  #=> false
"This".downcase?  #=> false
"this".downcase?  #=> true


12
13
14
# File 'lib/nano/string/downcase%3F.rb', line 12

def downcase?
  downcase == self
end

#dresnerObject

Scramble the inner characters of words leaving the text still readable (research at Cambridge University, code by KurtDresner).

For example, the above text may result in:

Srblamce the iennr cchrteaars of wodrs lvenaig the txet stlil rbeaadle
(rreceash at Cbamigdre Uverintisy, cdoe by KrneruestDr?)


12
13
14
# File 'lib/nano/string/dresner.rb', line 12

def dresner
  self.gsub(/\B\w+\B/){$&.split(//).sort_by{rand}} 
end

#dresner!Object

Inplace version of #dresner method.



7
8
9
# File 'lib/nano/string/dresner%21.rb', line 7

def dresner!
  self.replace(dresner)
end

#each_charObject

Iterates through each character.



5
6
7
8
9
# File 'lib/nano/string/each_char.rb', line 5

def each_char  # :yield:
  split(//).each { |c|
    yield( c )
  }
end

#each_word(&yld) ⇒ Object

Iterate through each word of a string.

"a string".each_word { |word, range| ... }


8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/nano/string/each_word.rb', line 8

def each_word( &yld )
  rest_of_string = self
  wordfind = /([-'\w]+)/
  arity = yld.arity
  offset = 0
  while wmatch = wordfind.match(rest_of_string)
    word = wmatch[0]
    range = offset+wmatch.begin(0) ... offset+wmatch.end(0)
    rest_of_string = wmatch.post_match
    if arity == 1
      yld.call(word)
    else
      yld.call(word, range)
    end
    offset = self.length - rest_of_string.length
  end
end

#entropyObject



27
28
29
# File 'lib/nano/enumerable/entropy.rb', line 27

def entropy
  self.split(//).entropy
end

#first(separator_pattern = //) ⇒ Object

Returns the first separation of a string. Default seperation is by character.

"Hello World".first       #=> "H"
"Hello World".first(' ')  #=> "Hello"


13
14
15
16
# File 'lib/nano/string/first.rb', line 13

def first(separator_pattern=//)
  pattern = self.class.patterns(separator_pattern)
  split(separator_pattern).at(0)
end

#first!(separator_pattern = //) ⇒ Object

Removes the first separation from a string. Defualt separation is by characters. – If a zero-length record separator is supplied, the string is split on /n+/. If the record separator is set to nil, then the string is split on characters. ++

a = "Hello World"
a.first!       #=> "H"
a              #=> "ello World"

a = "Hello World"
a.first!(' ')  #=> "Hello"
a              #=> "World"


24
25
26
27
28
29
30
31
32
# File 'lib/nano/string/first%21.rb', line 24

def first!(separator_pattern=//)
  pattern = self.class.patterns(separator_pattern)
  a = self.shatter(pattern)
  r = a.first
  a.shift
  a.shift
  self.replace( a.join('') )
  return r
end

#first=(x) ⇒ Object

Prepends to a string.

"Hello World".first = "Hello,"  #=> "Hello, Hello World"


9
10
11
# File 'lib/nano/string/first%3D.rb', line 9

def first=( x )
  insert(0, x.to_s)
end

#first_char(n = 1) ⇒ Object

Returns first n characters.

"Hello World".first_char(3)  #=> "Hel"


8
9
10
# File 'lib/nano/string/first_char.rb', line 8

def first_char(n=1)
  slice(0, n.to_i)
end

#foldObject

Returns a new string with all new lines removed from adjacent lines of text.

s = "This is\na test.\n\nIt clumps\nlines of text."
s.fold

produces

"This is a test.\n\nIt clumps lines of text. "

– One possible flaw with this that might could use a fix: if the given string ends in a newline, it is replaced with a single space. ++



19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/nano/string/fold.rb', line 19

def fold
  ns = ''
  i = 0
  br = self.scan(/(\n\s*\n|\Z)/m) { |m|
    b = $~.begin(1)
    e = $~.end(1)
    nl = $&
    ns << self[i...b].gsub(/[ ]*\n+/,' ')
    ns << nl
    i = e
  }
  ns
end

#frequency(*args) ⇒ Object



7
8
9
# File 'lib/nano/string/frequency.rb', line 7

def frequency(*args)
  to_arr(*args).frequency
end

#humanizeObject

Replaces underscores with spaces and capitalizes word.



6
7
8
# File 'lib/nano/string/humanize.rb', line 6

def humanize
  self.gsub(/_/, " ").capitalize
end

#indent(n) ⇒ Object

Indent left or right by n spaces. (This used to be called #tab and aliased as #indent.)



9
10
11
12
13
14
15
# File 'lib/nano/string/indent.rb', line 9

def indent(n)
  if n >= 0
    gsub(/^/, ' ' * n)
  else
    gsub(/^ {0,#{-n}}/, "")
  end
end

#index_all(s, reuse = false) ⇒ Object

Like index but returns an array of all index locations. The reuse flag allows the trailing portion of a match to be reused for subsquent matches.

"abcabcabc".index_all('a')  #=> [0,3,6]


10
11
12
13
14
15
16
17
# File 'lib/nano/string/index_all.rb', line 10

def index_all(s, reuse=false)
  ia = []; i = 0
  while (i = self.index(s,i))
    ia << i
    i += (reuse ? 1 : $~.length)
  end
  ia
end

#last(separator_pattern = //) ⇒ Object

Returns the last separation of a string. Default separation is by character.

"Hello World".last(' ')  #=> "World"


12
13
14
15
# File 'lib/nano/string/last.rb', line 12

def last(separator_pattern=//)
  pattern = self.class.patterns(separator_pattern)
  self.split(separator_pattern).at(-1)
end

#last!(separator_pattern = //) ⇒ Object

Removes the last separation from a string. Default seperation is by characeter. – If a zero-length record separator is supplied, the string is split on /n+/. If the record separator is set to nil, then the string is split on characters. ++

a = "Hello World"
a.last!       #=> "d"
a             #=> "Hello Worl"

a = "Hello World"
a.last!(' ')  #=> "World"
a             #=> "Hello"


24
25
26
27
28
29
30
31
32
# File 'lib/nano/string/last%21.rb', line 24

def last!(separator_pattern=//)
  pattern = self.class.patterns(separator_pattern)
  a = self.shatter(pattern)
  r = a.last
  a.pop
  a.pop
  self.replace( a.join('') )
  return r
end

#last=(str) ⇒ Object

Appends to a string.

"Hello World".last = ", Bye."  #=>  "Hello World, Bye."


9
10
11
# File 'lib/nano/string/last%3D.rb', line 9

def last=(str)
  self << str
end

#last_char(n = 1) ⇒ Object

Returns last n characters.

"Hello World".last_char(3)  #=> "rld"


8
9
10
11
12
# File 'lib/nano/string/last_char.rb', line 8

def last_char(n=1)
  n = n.to_i
  return self if n > size
  slice(-n, n) #self[-n..-1]
end

#line_wrap(width) ⇒ Object

Line wrap at width.

puts "1234567890".line_wrap(5)

produces

12345
67890


13
14
15
16
17
18
# File 'lib/nano/string/line_wrap.rb', line 13

def line_wrap(width)
  s = gsub(/\t/,' '*4) # tabs default to 4 spaces.
  s = s.gsub(/\n/,' ')
  r = s.scan( /.{1,#{width}}/ )
  r.join("\n") << "\n"
end

#linesObject

Returns an array of characters.

"abc\n123".lines  #=> ["abc","123"]


8
9
10
# File 'lib/nano/string/lines.rb', line 8

def lines
  self.split(/\n/)
end

#margin(n = 0) ⇒ Object

Provides a margin controlled string.

x = %Q{
      | This
      |   is
      |     margin controlled!
      }.margin

– This may still need a bit of tweaking. ++



15
16
17
18
19
20
21
22
23
24
# File 'lib/nano/string/margin.rb', line 15

def margin(n=0)
  d = /\A.*\n\s*(.)/.match( self )[1]
  d = /\A\s*(.)/.match( self)[1] unless d
  return '' unless d
  if n == 0
    gsub(/\n\s*\Z/,'').gsub(/^\s*[#{d}]/, '')
  else
    gsub(/\n\s*\Z/,'').gsub(/^\s*[#{d}]/, ' ' * n)
  end
end

#mscan(re) ⇒ Object

Like #scan but returns MatchData ($~) rather then matched string ($&).



7
8
9
10
11
12
13
14
15
# File 'lib/nano/string/mscan.rb', line 7

def mscan(re) #:yield:
  if block_given?
    scan(re) { yield($~) }
  else
    m = []
    scan(re) { m << $~ }
    m
  end
end

#natcmp(str2, caseInsensitive = false) ⇒ Object

‘Natural order’ comparison of two strings, e.g.

"my_prog_v1.1.0" < "my_prog_v1.2.0" < "my_prog_v1.10.0"

which does not follow alphabetically. A secondary parameter, if set to true, makes the comparison case insensitive.

"Hello.10".natcmp("Hello.1")  #=> -1

– Adapted from:

http://sourcefrog.net/projects/natsort/natcmp.rb

Based on Martin Pool’s “Natural Order String Comparison” originally written in C. (see sourcefrog.net/projects/natsort/)

This implementation is Copyright © 2003 by Alan Davies <cs96and_AT_yahoo_DOT_co_DOT_uk>

This software is provided ‘as-is’, without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.

Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:

  1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.

  2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.

  3. This notice may not be removed or altered from any source distribution.

++



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/nano/string/natcmp.rb', line 42

def natcmp(str2, caseInsensitive=false)
  str1 = self.dup
  str2 = str2.dup
  compareExpression = /^(\D*)(\d*)(.*)$/

  if caseInsensitive
    str1.downcase!
    str2.downcase!
  end

  # remove all whitespace
  str1.gsub!(/\s*/, '')
  str2.gsub!(/\s*/, '')

  while (str1.length > 0) or (str2.length > 0) do
    # Extract non-digits, digits and rest of string
    str1 =~ compareExpression
    chars1, num1, str1 = $1.dup, $2.dup, $3.dup
    str2 =~ compareExpression
    chars2, num2, str2 = $1.dup, $2.dup, $3.dup
    # Compare the non-digits
    case (chars1 <=> chars2)
      when 0 # Non-digits are the same, compare the digits...
        # If either number begins with a zero, then compare alphabetically,
        # otherwise compare numerically
        if (num1[0] != 48) and (num2[0] != 48)
          num1, num2 = num1.to_i, num2.to_i
        end
        case (num1 <=> num2)
          when -1 then return -1
          when 1 then return 1
        end
      when -1 then return -1
      when 1 then return 1
    end # case
  end # while

  # strings are naturally equal.
  return 0
end

#nchar(n, replacement = nil) ⇒ Object

Retrns n characters of the string. If n is positive the characters are from the beginning of the string. If n is negative from the end of the string.

Alternatively a replacement string can be given, which will replace the n characters.



11
12
13
14
15
16
17
18
19
# File 'lib/nano/string/nchar.rb', line 11

def nchar( n, replacement=nil )
  if replacement
    s = self.dup
    n > 0 ? (s[0...n] = replacement) : (s[n..-1] = replacement)
    return s
  else
    n > 0 ? self[0...n] : self[n..-1]
  end
end

#ordinalObject



4
5
6
# File 'lib/nano/string/ordinal.rb', line 4

def ordinal
  self.to_i.ordinal
end

#pluralObject Also known as: pluralize

Convert an English word from singular to plurel.

"boy".plural     #=> boys
"tomato".plural  #=> tomatoes


32
33
34
35
36
37
38
# File 'lib/nano/string/plural.rb', line 32

def plural
  result = self.to_s.dup
  String::PLURAL_RULES.each do |(rule, replacement)|
    break if result.gsub!(rule, replacement)
  end
  return result
end

#popObject



2
3
4
5
6
# File 'lib/nano/string/pop.rb', line 2

def pop
  return '' if size == 0
  self[-1] = ''
  self
end

#probability(*args) ⇒ Object



7
8
9
# File 'lib/nano/string/probability.rb', line 7

def probability(*args)
  to_arr(*args).probability
end

#push(str = ' ') ⇒ Object



2
3
4
# File 'lib/nano/string/push.rb', line 2

def push(str=' ')
  concat(str)
end

#quote(type = :s) ⇒ Object

Return a new string embraced by given quotes. If no quotes are specified, then assumes single quotes.

"quote me".quote     #=> "'quote me'"
"quote me".quote(2)  #=> "\"quote me\""


11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/nano/string/quote.rb', line 11

def quote(type=:s)
  case type.to_s.downcase
  when 's', 'single'
    bracket("'")
  when 'd', 'double'
    bracket('"')
  when 'b', 'back'
    bracket('`')
  else
    bracket("'")
  end
end

#rand_byteObject

Return a random byte of self.

"Ruby rules".rand_byte  #=> 121


8
9
10
# File 'lib/nano/string/rand_byte.rb', line 8

def rand_byte
  self[rand( size )]
end

#rand_byte!Object

Destructive rand_byte. Delete a random byte of self and return it.

s = "Ruby rules"
s.rand_byte!      #=> 121
s                 #=> "Rub rules"


10
11
12
13
14
15
# File 'lib/nano/string/rand_byte%21.rb', line 10

def rand_byte!
  i = rand( size )
  rv = self[i,1]
  self[i,1] = ''
  rv
end

#rand_indexObject

Return a random string index.

"Ruby rules".rand_index  #=> 3


6
7
8
# File 'lib/nano/string/rand_index.rb', line 6

def rand_index
  rand( size )
end

#range(s, offset = 0) ⇒ Object

Like #index but returns a Range.

"This is a test!".range('test')  #=> 10..13


8
9
10
11
12
13
# File 'lib/nano/string/range.rb', line 8

def range(s, offset=0)
  if self.index(s, offset)
    return ($~.begin(0))..($~.end(0)-1)
  end
  nil
end

#range_all(s, reuse = false) ⇒ Object

Like #index_all but returns an array of Ranges.

"abc123abc123".range_all('abc')  #=> [0..2, 6..8]

– Note: should add offset ? ++



12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/nano/string/range_all.rb', line 12

def range_all(s, reuse=false)
  r = []; i = 0
  while i < self.length
    rng = range(s, i)
    if rng
      r << rng
      i += reuse ? 1 : rng.end + 1
    else
      break
    end
  end
  r.uniq
end

#range_of_lineObject

Returns an array of ranges mapping the characters per line.

"this\nis\na\ntest".range_of_line
#=> [0..4, 5..7, 8..9, 10..13]


10
11
12
13
14
15
16
17
# File 'lib/nano/string/range_of_line.rb', line 10

def range_of_line
  offset=0; charmap = []
  self.each do |line|
    charmap << (offset..(offset + line.length - 1))
    offset += line.length
  end
  charmap
end

#shatter(re) ⇒ Object

Breaks a string up into an array based on a regular expression. Similar to scan, but includes the matches.

s = "<p>This<b>is</b>a test.</p>"
s.shatter( /\<.*?\>/ )

produces

["<p>", "This", "<b>", "is", "</b>", "a test.", "</p>"]


14
15
16
17
18
19
# File 'lib/nano/string/shatter.rb', line 14

def shatter( re )
  r = self.gsub( re ){ |s| "\1" + s + "\1" }
  while r[0,1] == "\1" ; r[0] = '' ; end
  while r[-1,1] == "\1" ; r[-1] = '' ; end
  r.split("\1")
end

#shiftObject



2
3
4
5
6
# File 'lib/nano/string/shift.rb', line 2

def shift
  return '' if size == 0
  self[0] = ''
  self
end

#shuffle(separator = //) ⇒ Object

Return the string with seperated sections arranged in a random order. The default seperation is by character.

"Ruby rules".shuffle  #=> "e lybRsuur"


10
11
12
# File 'lib/nano/string/shuffle.rb', line 10

def shuffle(separator=//)
  split(separator).shuffle.join('')
end

#shuffle!(separator = //) ⇒ Object

In place version of shuffle.



16
17
18
# File 'lib/nano/string/shuffle.rb', line 16

def shuffle!(separator=//)
  self.replace( shuffle(separator) )
end

#similarity(str_in) ⇒ Object Also known as: fuzzy_match

A fuzzy matching mechanism. Returns a score from 0-1, based on the number of shared edges. To be effective, the strings must be of length 2 or greater.

"Alexsander".fuzzy_match( "Aleksander" )  #=> 0.9

The way it works:

  • Converts each string into a “graph like” object, with edges

    "alexsander" -> [ alexsander, alexsand, alexsan ... lexsand ... san ... an, etc ]
    "aleksander" -> [ aleksander, aleksand ... etc. ]
    
  • Perform match, then remove any subsets from this matched set (i.e. a hit on “san” is a subset of a hit on “sander”)

    Above example, once reduced -> [ ale, sander ]
    
  • See’s how many of the matches remain, and calculates a score based on how many matches, their length, and compare to the length of the larger of the two words.

– Credit goes to Derek Lewis. Thanks Derek! Still a bit rough. Any suggestions for improvement are welcome. ++



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/nano/string/similarity.rb', line 25

def similarity( str_in )
  return 0 if str_in == nil
  return 1 if self == str_in

  # Make a graph of each word (okay, so its not a true graph, but is similar)
  graph_A = Array.new
  graph_B = Array.new

  # "graph" self
  last = self.length
  (0..last).each do |ff|
    loc  = self.length
    break if ff == last - 1
    wordB = (1..(last-1)).to_a.reverse!
    if (wordB != nil)
      wordB.each do |ss|
        break if ss == ff
        graph_A.push( "#{self[ff..ss]}" )
      end
    end
  end

  # "graph" input string
  last = str_in.length
  (0..last).each{ |ff|
    loc  = str_in.length
    break if ff == last - 1
    wordB = (1..(last-1)).to_a.reverse!
    wordB.each do |ss|
      break if ss == ff
      graph_B.push( "#{str_in[ff..ss]}" )
    end
  }

  # count how many of these "graph edges" we have that are the same
  matches = Array.new
  graph_A.each do |aa|
    matches.push( aa ) if( graph_B.include?( aa ) )
  end
  # For eliminating subsets, we want to start with the smallest hits.
  matches.sort!{|x,y| x.length <=> y.length}

  # eliminate any subsets
  mclone = matches.dup
  mclone.each_index do |ii|
    reg = Regexp.compile( mclone[ii] )
    count = 0.0
    matches.each{|xx| count += 1 if xx =~ reg}
    matches.delete(mclone[ii]) if count > 1
  end

  score = 0.0
  matches.each{ |mm| score += mm.length }
  self.length > str_in.length ? largest = self.length : largest = str_in.length
  return score/largest
end

#singularObject Also known as: singularize

Convert an English word from plurel to singular.

"boys".singular      #=> boy
"tomatoes".singular  #=> tomato


40
41
42
43
44
45
46
# File 'lib/nano/string/singular.rb', line 40

def singular
  result = self.to_s.dup
  String::SINGULAR_RULES.each do |(rule, replacement)|
    break if result.gsub!(rule, replacement)
  end
  return result
end

#slap(str = ' ') ⇒ Object

Like push but works from the other end of the string.



5
6
7
# File 'lib/nano/string/slap.rb', line 5

def slap( str=' ' )
  insert(0, str)
end

#soundexObject

Implementation of the soundex algorithm as described by Knuth in volume 3 of The Art of Computer Programming. Returns nil if the value couldn’t be calculated b/c of empty-string or invalid character.

"Ruby".soundex  #=> "R100"

– Credit goes to Michael Neumann ([email protected]). ++



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/nano/string/soundex.rb', line 14

def soundex
  return nil if self.empty?

  str = self.upcase
  last_code = String.soundex_code(str[0,1])
  soundex_code = str[0,1]

  for index in 1...(str.size) do
    return soundex_code if soundex_code.size == 4

    code = String.soundex_code(str[index,1])

    if code == "0" then
      last_code = nil
    elsif code == nil then
      return nil
    elsif code != last_code then
      soundex_code += code
      last_code = code
    end
  end

  return soundex_code + "000"[0,4-soundex_code.size]
end

#succ(n = 1) ⇒ Object

Allows #succ to take n step increments.

"abc".succ      #=> "abd"
"abc".succ(4)   #=> "abg"
"abc".succ(24)  #=> "aca"


9
10
11
12
13
# File 'lib/nano/string/succ.rb', line 9

def succ(n=1)
  s = self
  n.times { s = s.succ_once }
  s
end

#succ_onceObject



2
# File 'lib/nano/string/succ.rb', line 2

alias_method( :succ_once, :succ )

#tab(n) ⇒ Object

Aligns each line n spaces. (This used to be #taballto.)



9
10
11
# File 'lib/nano/string/tab.rb', line 9

def tab(n)
  gsub(/^ */, ' ' * n)
end

#tabto(n) ⇒ Object

Preserves relative tabbing. The first non-empty line ends up with n spaces before nonspace.



9
10
11
12
13
14
15
# File 'lib/nano/string/tabto.rb', line 9

def tabto(n)
  if self =~ /^( *)\S/
    indent(n - $1.length)
  else
    self
  end
end

#to_aObject

Alias for #to_arr. See #to_arr for documentation.

WARNING! Use this method with caution as its default behavior (i.e. chars splitting) is nonconvential (unfortunately).



11
# File 'lib/nano/string/to_a.rb', line 11

alias_method :to_a, :to_arr

#to_arr(mode = nil) ⇒ Object

Split a string into an array according to ath egiven mode. The mode is a recognized symbol or a regexp. If no mode is given the default mode :char (same as regular expression //) is used.

Symbolized modes include :byte, :char, :word, :line and :enum.

The :enum mode passes the call up to the Enumerable#to_a, thus providing a means to Ruby’s current built-in behavior.

'abc 123'.to_arr        #=> ['a','b','c',' ','1','2','3']
'abc 123'.to_arr(:word) #=> ['abc','123']


17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/nano/string/to_arr.rb', line 17

def to_arr(mode=nil)
  case mode
  when nil, :char, :chars
    split(//)
  when :enum
    super
  when :byte, :bytes
    unpack('C*')
  when :word, :words
    split(/[ ]+/)
  when :line, :lines
    split(/\n/)
  else
    split(mode)
  end
end

#to_bObject

Interpret common affirmative string meanings as true, otherwise false. Balnk sapce and case are ignored. The following strings that will return true:

<tt>true</tt>,<tt>yes</tt>,<tt>on</tt>,<tt>t</tt>,<tt>1</tt>,<tt>y</tt>,<tt>==</tt>

Examples:

"true".to_b   #=> true
"yes".to_b    #=> true
"no".to_b     #=> false
"123".to_b    #=> false


17
18
19
20
21
22
23
24
25
26
# File 'lib/nano/string/to_b.rb', line 17

def to_b
  case self.downcase.strip
  when 'true', 'yes', 'on', 't', '1', 'y', '=='
    return true
  when 'nil', 'null'
    return nil
  else
    return false
  end
end

#to_constObject

Get a constant by a given string name.

"Class".to_const   #=> Class

Note this method is not as verstile as it should be, since it can not access contants relative to the current execution context. But without a binding_of_caller that does not seem possible.



10
11
12
# File 'lib/nano/string/to_const.rb', line 10

def to_const
  split('::').inject(Object){ |namespace,name| namespace.const_get(name) }
end

#to_dateObject



4
5
6
7
8
# File 'lib/nano/string/to_date.rb', line 4

def to_date
  require 'date'
  require 'parsedate'
  ::Date::civil(*ParseDate.parsedate(self)[0..2])
end

#to_proc(context = nil) ⇒ Object

Evaluates a String as a Proc.

xyp = "|x,y| x + y".to_proc
xyp.class      #=> Proc
xyp.call(1,2)  #=> 3

– Note: Sure would be nice if this could grab the caller’s context! ++



14
15
16
17
18
19
20
21
22
23
24
# File 'lib/nano/string/to_proc.rb', line 14

def to_proc(context=nil)
  if context
    if context.kind_of?(Binding) or context.kind_of?(Proc)
      Kernel.eval "proc { #{self} }", context
    else context
      context.instance_eval "proc { #{self} }"
    end
  else
    Kernel.eval "proc { #{self} }"
  end
end

#to_re(esc = true) ⇒ Object

Turns a string into a regular expression. By default it will escape all characters. Use false argument to turn off escaping.

"[".to_re  #=> /\[/


10
11
12
# File 'lib/nano/string/to_re.rb', line 10

def to_re(esc=true)
  Regexp.new((esc ? Regexp.escape(self) : self))
end

#to_timeObject



4
5
6
7
# File 'lib/nano/string/to_time.rb', line 4

def to_time
  require 'time'
  Time.parse(self)
end

#underscoreObject

Underscore string based on camelcase characteristics.



5
6
7
# File 'lib/nano/string/underscore.rb', line 5

def underscore #(camel_cased_word)
  self.gsub(/([A-Z]+)([A-Z])/,'\1_\2').gsub(/([a-z])([A-Z])/,'\1_\2').downcase
end

#unix_cryptObject

Common Unix cryptography method.



5
6
7
# File 'lib/nano/string/unix_crypt.rb', line 5

def unix_crypt
  self.crypt(String.rand_letter + String.rand_letter)
end

#unix_crypt!Object

Common Unix cryptography in-place method.



5
6
7
# File 'lib/nano/string/unix_crypt%21.rb', line 5

def unix_crypt!
  self.replace( unix_crypt )
end

#unpack(format, offset = nil) ⇒ Object

Upack with offset. Extends #unpack to allow a string to be unpacked starting at an offset position within it.



8
9
10
11
12
13
14
# File 'lib/nano/string/unpack.rb', line 8

def unpack(format, offset=nil)
  if offset.nil?
    unpack_from_orgin(format)
  else
    self[offset..-1].unpack_from_orgin(format)
  end
end

#unpack_from_orginObject



4
# File 'lib/nano/string/unpack.rb', line 4

alias_method( :unpack_from_orgin, :unpack )

#unshift(str = ' ') ⇒ Object



3
4
5
# File 'lib/nano/string/unshift.rb', line 3

def unshift( str=' ' )
  insert(0, str)
end

#upcase?Boolean Also known as: uppercase?

Is the string upcase/uppercase?

"THIS".upcase?  #=> true
"This".upcase?  #=> false
"this".upcase?  #=> false


13
14
15
# File 'lib/nano/string/upcase%3F.rb', line 13

def upcase?
  self.upcase == self
end

#word_filter(&blk) ⇒ Object

Filters out words from a string based on block test.

"a string".word_filter { |word| word =~ /^a/ }  #=> "string"


9
10
11
12
# File 'lib/nano/string/word_filter.rb', line 9

def word_filter( &blk )
  s = self.dup
  s.word_filter!( &blk )
end

#word_filter!Object

In place version of #word_filter.

"a string".word_filter { |word| ... }


8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/nano/string/word_filter%21.rb', line 8

def word_filter! #:yield:
  rest_of_string = self
  wordfind = /(\w+)/
  offset = 0
  while wmatch = wordfind.match(rest_of_string)
    word = wmatch[0]
    range = offset+wmatch.begin(0) ... offset+wmatch.end(0)
    rest_of_string = wmatch.post_match
    self[range] = yield( word ).to_s
    offset = self.length - rest_of_string.length
  end
  self
end

#word_wrap(max = 80) ⇒ Object

Word wrap a string not exceeding max width.

puts "this is a test".word_wrap(4)

produces

this
is a
test


15
16
17
18
19
# File 'lib/nano/string/word_wrap.rb', line 15

def word_wrap(max=80)
  c = dup
  c.word_wrap!(max)
  c
end

#word_wrap!(max = 80) ⇒ Object

As with #word_wrap, but modifies the string in place.

Raises:

  • (ArgumentError)


7
8
9
10
11
# File 'lib/nano/string/word_wrap%21.rb', line 7

def word_wrap!(max=80)
  raise ArgumentError, "Wrap margin too low: #{n}" if max <= 2
  #gsub!( Regexp.new( "(.{1,#{max-1}}\\w)\\b\\s*" ), "\\1\n")
  gsub!( /(.{1,#{max-1}}\S)([ ]|\n)/, "\\1\n")
end

#wordsObject

Returns an array of characters.

"abc 123".words  #=> ["abc","123"]


8
9
10
# File 'lib/nano/string/words.rb', line 8

def words
  self.split(/[ ]+/)
end