Class: String

Inherits:

Object

Object
String

show all

Includes:: Indexable, Random::StringExtensions

Defined in:: lib/core/facets/roman.rb,
lib/core/facets/boolean.rb,
lib/core/facets/string/xor.rb,
lib/core/facets/string/file.rb,
lib/core/facets/string/fold.rb,
lib/core/facets/string/trim.rb,
lib/core/facets/kernel/blank.rb,
lib/core/facets/string/align.rb,
lib/core/facets/string/crypt.rb,
lib/core/facets/string/mscan.rb,
lib/core/facets/string/nchar.rb,
lib/core/facets/string/quote.rb,
lib/core/facets/string/range.rb,
lib/core/facets/string/store.rb,
lib/core/facets/string/to_re.rb,
lib/core/facets/string/words.rb,
lib/core/facets/string/cleave.rb,
lib/core/facets/string/divide.rb,
lib/core/facets/string/indent.rb,
lib/core/facets/string/lchomp.rb,
lib/core/facets/string/linear.rb,
lib/core/facets/string/margin.rb,
lib/core/facets/string/natcmp.rb,
lib/core/facets/string/number.rb,
lib/core/facets/string/op_div.rb,
lib/core/facets/string/random.rb,
lib/core/facets/string/remove.rb,
lib/core/facets/string/rotate.rb,
lib/core/facets/string/splice.rb,
lib/core/facets/string/squish.rb,
lib/core/facets/string/unfold.rb,
lib/core/facets/comparable/cmp.rb,
lib/core/facets/string/acronym.rb,
lib/core/facets/string/bracket.rb,
lib/core/facets/string/exclude.rb,
lib/core/facets/string/pathize.rb,
lib/core/facets/string/rewrite.rb,
lib/core/facets/string/shatter.rb,
lib/core/facets/string/modulize.rb,
lib/core/facets/string/newlines.rb,
lib/core/facets/string/camelcase.rb,
lib/core/facets/string/each_word.rb,
lib/core/facets/string/index_all.rb,
lib/core/facets/string/indexable.rb,
lib/core/facets/string/line_wrap.rb,
lib/core/facets/string/methodize.rb,
lib/core/facets/string/snakecase.rb,
lib/core/facets/string/titlecase.rb,
lib/core/facets/string/uppercase.rb,
lib/core/facets/string/word_wrap.rb,
lib/core/facets/string/ascii_only.rb,
lib/core/facets/string/cleanlines.rb,
lib/core/facets/string/expand_tab.rb,
lib/core/facets/string/similarity.rb,
lib/core/facets/string/variablize.rb,
lib/core/facets/string/capitalized.rb,
lib/core/facets/string/interpolate.rb,
lib/core/facets/string/starts_with.rb,
lib/core/facets/object/object_state.rb,
lib/core/facets/string/edit_distance.rb,
lib/core/facets/string/compress_lines.rb,
lib/standard/facets/date.rb,
lib/standard/facets/tuple.rb,
lib/standard/facets/random.rb

Constant Summary collapse

ROMAN = Taken from O’Reilly’s Perl Cookbook 6.23. Regular Expression Grabbag.

/^M*(D?C{0,3}|C[DM])(L?X{0,3}|X[LC])(V?I{0,3}|I[VX])$/i

ROMAN_VALUES =

Integer::ROMAN_VALUES.inject({}) do |h,(r,a)|
  h[r] = a; h
end

BRA2KET =

{ '['=>']', '('=>')', '{'=>'}', '<'=>'>' }

Class Method Summary collapse

.interpolate(&str) ⇒ Object

Interpolate provides a means of externally using Ruby string interpolation mechinism.
.random(len = 32, character_set = ["A".."Z", "a".."z", "0".."9"]) ⇒ Object

Create a random String of given length, using given character set.

Instance Method Summary collapse

#-(pattern) ⇒ Object

Removes occurances of a string or regexp.
#/(path) ⇒ Object

Treats self and path as representations of pathnames, joining thme together as a single path.
#^(aString) ⇒ Object

Binary XOR of two strings.
#_crypt ⇒ Object
#acronym ⇒ Object

Transform a string into an acronym.
#align(direction, n, sep = "\n", c = ' ') ⇒ Object

Alignment method dispatches to #align_right, #align_left or #align_center, accorging to the first direction parameter.
#align_center(n, sep = "\n", c = ' ') ⇒ Object

Centers each line of a string.
#align_left(n, sep = "\n", c = ' ') ⇒ Object

Align a string to the left.
#align_right(n, sep = "\n", c = ' ') ⇒ Object

Align a string to the right.
#ascii_only(alt = '') ⇒ Object

Get a new string with non-ASCII characters removed.
#ascii_only!(alt = '') ⇒ Object

Modify string keeping only ASCII characters.
#blank? ⇒ Boolean

Is this string just whitespace?.
#bracket(bra, ket = nil) ⇒ Object

Return a new string embraced by given brackets.
#bracket!(bra, ket = nil) ⇒ Object

Inplace version of #bracket.
#briefcase ⇒ Object

Transform a string into a sentence like form.
#camelcase(*separators) ⇒ Object

Converts a string to camelcase.
#capitalized? ⇒ Boolean

Return true if the string is capitalized, otherwise false.
#cleanlines(&block) ⇒ Object

Returns an Enumerator for iterating over each line of the string, stripped of whitespace on either side.
#cleave(threshold = nil, len = nil) ⇒ Object

Cleave a string.
#cmp(other) ⇒ Object

Compare method that takes length into account.
#compress_lines(spaced = true) ⇒ Object

Matches any whitespace (including newline) and replaces with a single space.
#crypt(salt = nil) ⇒ Object

Common Unix cryptography method.
#divide(re) ⇒ Object

Breaks a string up into an array based on a regular expression.
#downcase? ⇒ Boolean

Return true if the string is lowercase (downcase), otherwise false.
#each_word(&block) ⇒ Object

Iterate through each word of a string.
#edit_distance(str2) ⇒ Object

Levenshtein distance algorithm implementation for Ruby, with UTF-8 support.
#exclude?(str) ⇒ Boolean

The inverse of include?.
#expand_tabs(n = 8) ⇒ Object (also: #expand_tab)

Expands tabs to n spaces.
#file ⇒ Object

Use fluent notation for making file directives.
#fold(ignore_indented = false) ⇒ Object

Returns a new string with all new lines removed from adjacent lines of text.
#indent(n, c = ' ') ⇒ Object

Indent left or right by ‘n` spaces, or `n` number of `c` string.
#indent!(n, c = ' ') ⇒ Object

Equivalent to String#indent, but modifies the receiver in place.
#index_all(s, reuse = false) ⇒ Object

Like index but returns an array of all index locations.
#lchomp(match) ⇒ Object

Left chomp.
#lchomp!(match) ⇒ Object

In-place left chomp.
#line_wrap(width, tabs = 4) ⇒ Object

Line wrap at width.
#linear ⇒ Object

Like ‘#newlines` but returns a Functor instead.
#lower_camelcase(*separators) ⇒ Object deprecated Deprecated.

Use ‘#camelcase(:lower)` instead.
#lowercase ⇒ Object

Downcase first letter.
#margin(num = nil, opts = {}) ⇒ Object

Preserve relative tabbing such that the line with the least amount of white space ends up with the given number of spaces before non-space and all other lines move relative to it.
#methodize ⇒ Object

Translate a class or module name to a suitable method name.
#modulize ⇒ Object

Converts a string to module name representation.
#mscan(re) ⇒ Object (also: #each_match)

Like #scan but returns MatchData ($~) rather then matched string ($&).
#natcmp(str2, caseInsensitive = false) ⇒ Object

‘Natural order’ comparison of strings, e.g.
#nchar(n, replacement = nil) ⇒ Object

Returns n characters of the string.
#newlines(&block) ⇒ Object

Returns an Enumerator for iterating over each line of the string, void of the termining newline character, in contrast to #lines which retains it.
#number? ⇒ Boolean

Returns true if it’s a decimal digits.
#object_state(data = nil) ⇒ Object
#pathize ⇒ Object

Transforms a namespace, i.e.
#quote(type = :double, count = nil) ⇒ Object

Return a new string embraced by given type and count of quotes.
#range(pattern, offset = 0) ⇒ Object

Like #index but returns a Range.
#range_all(pattern, reuse = false) ⇒ Object

Like #index_all but returns an array of Ranges.
#range_of_line ⇒ Object

Returns an array of ranges mapping the characters per line.
#remove(pattern) ⇒ Object

Removes all occurrences of a pattern in a string.
#remove!(pattern) ⇒ Object

Removes all occurrences of a pattern in a string.
#rewrite(rules) ⇒ Object

Apply a set of rules in the form of regular expression matches to the string.
#roman ⇒ Object

Considers string a Roman numeral numeral, and converts it to the corresponding integer.
#roman? ⇒ Boolean

Returns true iif the subject is a valid Roman numeral.
#rotate(count = 1) ⇒ Object

Rotate string to the left with count.
#rotate!(count = 1) ⇒ Object

Destructive version of String#rotate.
#shatter(re) ⇒ Object

Breaks a string up into an array based on a regular expression.
#similarity(str_in) ⇒ Object

A fuzzy matching mechanism.
#snakecase ⇒ Object (also: #underscore)

Underscore a string such that camelcase, dashes and spaces are replaced by underscores.
#splice(idx, sub = nil) ⇒ Object

String#slice is essentially the same as #store.
#squish ⇒ Object

Returns the string, first removing all whitespace on both ends of the string, and then changing remaining consecutive whitespace groups into one space each.
#squish! ⇒ Object

Performs a destructive squish.
#titlecase ⇒ Object

Transform a string into a form that makes for an acceptable title.
#to_b ⇒ Object

Interpret common affirmative string meanings as true, otherwise nil or false.
#to_date ⇒ Object

Parse data from string.
#to_datetime ⇒ Object

Convert string to DateTime.
#to_re(esc = false) ⇒ Object

Turns a string into a regular expression.
#to_rx(esc = true) ⇒ Object

Turns a string into a regular expression.
#to_t(&yld) ⇒ Object

Translates a string in the form on a set of numerical and/or alphanumerical characters separated by non-word characters (eg W+) into a Tuple.
#to_time(form = :utc) ⇒ Object
#trim(num = 0) ⇒ Object

Control the margin of a string using a trim character.
#unbracket(bra = nil, ket = nil) ⇒ Object

Return a new string with the given brackets removed.
#unbracket!(bra = nil, ket = nil) ⇒ Object

Inplace version of #unbracket.
#unfold ⇒ Object

Unfold paragraphs such that new lines are removed from between sentences of the same paragraph.
#unindent(size = nil) ⇒ Object

Remove excessive indentation.
#unindent! ⇒ Object

Equivalent to String#unindent, but modifies the receiver in place.
#unquote ⇒ Object

Remove quotes from string.
#upcase? ⇒ Boolean

Is the string upcase/uppercase?.
#upper_camelcase(*separators) ⇒ Object deprecated Deprecated.

Use ‘#camelcase(:upper)` instead.
#uppercase ⇒ Object

Upcase first letter.
#variablize ⇒ Object

Prepend an “@” to the beginning of a string to make a instance variable name.
#word_wrap(col_width = 80) ⇒ Object

Word wrap a string not exceeding max width.
#word_wrap!(col_width = 80) ⇒ Object

As with #word_wrap, but modifies the string in place.
#words ⇒ Object

Returns an array of characters.
#words_without_punctuation ⇒ Object

Returns an array of words in the commonly-understood sense (not including punctuation).

Methods included from Random::StringExtensions

#at_rand, #at_rand!, included, #rand_byte, #rand_byte!, #rand_index, #shuffle, #shuffle!

Methods included from Indexable

#body, #ends, #first, #first!, #first=, #foot, #from, #head, #index, #last, #last!, #last=, #mid, #middle, #pos, #tail, #thru

Class Method Details

.interpolate(&str) ⇒ `Object`

Interpolate provides a means of externally using Ruby string interpolation mechinism.

try = "hello"
str = "\#{try}!!!"
String.interpolate{ str }    #=> "hello!!!"

Note this uses #eval under the hood. We do not recommend that it serve in place of a good templating system. But, it can be useful for simple cases.

The block is neccessary in order to get then binding of the caller.

CREDIT: Trans



18
19
20

# File 'lib/core/facets/string/interpolate.rb', line 18

def self.interpolate(&str)
  eval "%{#{str.call}}", str.binding
end

.random(len = 32, character_set = ["A".."Z", "a".."z", "0".."9"]) ⇒ `Object`

Create a random String of given length, using given character set

Character set is an Array which can contain Ranges, Arrays, Characters

Examples

String.random
=> "D9DxFIaqR3dr8Ct1AfmFxHxqGsmA4Oz3"

String.random(10)
=> "t8BIna341S"

String.random(10, ['a'..'z'])
=> "nstpvixfri"

String.random(10, ['0'..'9'] )
=> "0982541042"

String.random(10, ['0'..'9','A'..'F'] )
=> "3EBF48AD3D"

BASE64_CHAR_SET =  ["A".."Z", "a".."z", "0".."9", '_', '-']
String.random(10, BASE64_CHAR_SET)
=> "xM_1t3qcNn"

SPECIAL_CHARS = ["!", "@", "#", "$", "%", "^", "&", "*", "(", ")", "-", "_", "=", "+", "|", "/", "?", ".", ",", ";", ":", "~", "`", "[", "]", "{", "}", "<", ">"]
BASE91_CHAR_SET =  ["A".."Z", "a".."z", "0".."9", SPECIAL_CHARS]
String.random(10, BASE91_CHAR_SET)
 => "S(Z]z,J{v;"

CREDIT: Tilo Sloboda

SEE: gist.github.com/tilo/3ee8d94871d30416feba

TODO: Move to random.rb in standard library?

# File 'lib/core/facets/string/random.rb', line 39

def self.random(len=32, character_set = ["A".."Z", "a".."z", "0".."9"])
  chars = character_set.map{|x| x.is_a?(Range) ? x.to_a : x }.flatten
  Array.new(len){ chars.sample }.join
end

Instance Method Details

#-(pattern) ⇒ `Object`

Removes occurances of a string or regexp. This is an operator form for the #remove method.

("HELLO HELLO" - "LL")    #=> "HEO HEO"
("HELLO PERL" - /L\S/)    #=> "HEO PERL"

Returns a new [String] with all pattern matches removed.

CREDIT: Benjamin David Oakes



26
27
28

# File 'lib/core/facets/string/remove.rb', line 26

def -(pattern)
  gsub(pattern, '')
end

#/(path) ⇒ `Object`

Treats self and path as representations of pathnames, joining thme together as a single path.

('home' / 'trans')  #=> 'home/trans'



9
10
11

# File 'lib/core/facets/string/op_div.rb', line 9

def /(path)
  File.join(self, path.to_s)
end

#^(aString) ⇒ `Object`

Binary XOR of two strings.

a = "\000\000\001\001" ^ "\000\001\000\001"
b = "\003\003\003" ^ "\000\001\002"

a  #=> "\000\001\001\000"
b  #=> "\003\002\001"

# File 'lib/core/facets/string/xor.rb', line 11

def ^(aString)
  a = self.unpack('C'*(self.length))
  b = aString.unpack('C'*(aString.length))
  if (b.length < a.length)
    (a.length - b.length).times { b << 0 }
  end
  xor = ""
  0.upto(a.length-1) { |pos|
    x = a[pos] ^ b[pos]
    xor << x.chr()
  }
  return(xor)
end

#_crypt ⇒ `Object`

3	# File 'lib/core/facets/string/crypt.rb', line 3 alias_method :_crypt, :crypt

#acronym ⇒ `Object`

Transform a string into an acronym.

CREDIT: Robert Fey



7
8
9

# File 'lib/core/facets/string/acronym.rb', line 7

def acronym
 gsub(/(([a-zA-Z0-9])([a-zA-Z0-9])*)./,"\\2")
end

#align(direction, n, sep = "\n", c = ' ') ⇒ `Object`

Alignment method dispatches to #align_right, #align_left or #align_center, accorging to the first direction parameter.

s = <<-EOS
This is a test
  and
  so on
EOS

s.align(:right, 14)

produces …

This is a test
           and
         so on

Returns a String aligned right, left or center.

# File 'lib/core/facets/string/align.rb', line 21

def align(direction, n, sep="\n", c=' ')
  case direction
  when :right
    align_right(n, sep="\n", c=' ')
  when :left
    align_left(n, sep="\n", c=' ')
  when :center
    align_center(n, sep="\n", c=' ')
  else
    raise ArgumentError
  end
end

#align_center(n, sep = "\n", c = ' ') ⇒ `Object`

Centers each line of a string.

The default alignment separation is a new line (“n”). This can be changed as can be the padding string which defaults to a single space (‘ ’).

s = <<-EOS
  This is a test
  and
  so on
EOS

s.align_center(14)

produces …

This is a test
     and
    so on

CREDIT: Trans

# File 'lib/core/facets/string/align.rb', line 116

def align_center(n, sep="\n", c=' ')
  return center(n.to_i,c.to_s) if sep==nil
  q = split(sep.to_s).collect { |line|
    line.center(n.to_i,c.to_s)
  }
  q.join(sep.to_s)
end

#align_left(n, sep = "\n", c = ' ') ⇒ `Object`

Align a string to the left.

The default alignment separation is a new line (“n”). This can be changed as can be the padding string which defaults to a single space (‘ ’).

s = <<-EOS
This is a test
  and
  so on
EOS

s.align_left(20, "\n", '.')

produces …

This is a test......
and.................
so on...............

CREDIT: Trans

# File 'lib/core/facets/string/align.rb', line 86

def align_left(n, sep="\n", c=' ')
  return ljust(n.to_i,c.to_s) if sep==nil
  q = split(sep.to_s).map do |line|
    line.strip.ljust(n.to_i,c.to_s)
  end
  q.join(sep.to_s)
end

#align_right(n, sep = "\n", c = ' ') ⇒ `Object`

Align a string to the right.

The default alignment separation is a new line (“n”). This can be changed as can be the padding string which defaults to a single space (‘ ’).

s = <<-EOS
This is a test
  and
  so on
EOS

s.align_right(14)

produces …

This is a test
           and
         so on

CREDIT: Trans

# File 'lib/core/facets/string/align.rb', line 56

def align_right(n, sep="\n", c=' ')
  return rjust(n.to_i,c.to_s) if sep==nil
  q = split(sep.to_s).map do |line|
    line.rjust(n.to_i,c.to_s)
  end
  q.join(sep.to_s)
end

#ascii_only(alt = '') ⇒ `Object`

Get a new string with non-ASCII characters removed.

alt - String to replace non-ASCII characters with.

Defaults to a blank string (`''`).

Examples

'abc'.ascii_only     #=> 'abc'
'中文123'.ascii_only  #=> '123'

Returns a copy of [String] with ASCII characters only.

CREDIT: Nathan Long

SEE: stackoverflow.com/questions/1268289

# File 'lib/core/facets/string/ascii_only.rb', line 17

def ascii_only(alt='')
  encoding_options = {
    :invalid                     => :replace,  # Replace invalid byte sequences
    :undef                       => :replace,  # Replace anything not defined in ASCII
    :replace                     => alt,       # Use a blank for those replacements
    :UNIVERSAL_NEWLINE_DECORATOR => true       # Always break lines with \n
  }
  self.encode(Encoding.find('ASCII'), encoding_options)
end

#ascii_only!(alt = '') ⇒ `Object`

Modify string keeping only ASCII characters.

alt - String to replace non-ASCII characters with.

Defaults to a blank string (`''`).

Examples

'abc'.ascii_only!     #=> 'abc'
'中文123'.ascii_only!  #=> '123'

Returns [String]

CREDIT: Nathan Long

SEE: stackoverflow.com/questions/1268289

# File 'lib/core/facets/string/ascii_only.rb', line 42

def ascii_only!(alt='')
  encoding_options = {
    :invalid                     => :replace,  # Replace invalid byte sequences
    :undef                       => :replace,  # Replace anything not defined in ASCII
    :replace                     => alt,       # Use a blank for those replacements
    :UNIVERSAL_NEWLINE_DECORATOR => true       # Always break lines with \n
  }
  self.encode!(Encoding.find('ASCII'), encoding_options)
end

#blank? ⇒ `Boolean`

Is this string just whitespace?

"abc".blank?  #=> false
"   ".blank?  #=> true

Returns:

(Boolean)



74
75
76

# File 'lib/core/facets/kernel/blank.rb', line 74

def blank?
  /\S/ !~ self
end

#bracket(bra, ket = nil) ⇒ `Object`

Return a new string embraced by given brackets. If only one bracket char is given it will be placed on either side.

"wrap me".bracket('{')        #=> "{wrap me}"
"wrap me".bracket('--','!')   #=> "--wrap me!"

CREDIT: Trans

# File 'lib/core/facets/string/bracket.rb', line 14

def bracket(bra, ket=nil)
  #ket = String.bra2ket[$&] if ! ket && /^[\[({<]$/ =~ bra
  ket = BRA2KET[bra] unless ket
  "#{bra}#{self}#{ket ? ket : bra}"
end

#bracket!(bra, ket = nil) ⇒ `Object`

Inplace version of #bracket.

CREDIT: Trans



24
25
26

# File 'lib/core/facets/string/bracket.rb', line 24

def bracket!(bra, ket=nil)
  self.replace(bracket(bra, ket))
end

#briefcase ⇒ `Object`

Transform a string into a sentence like form.

"This Is A String".briefcase
#=> "This is a string"



22
23
24

# File 'lib/core/facets/string/titlecase.rb', line 22

def briefcase
  titlecase.capitalize
end

#camelcase(*separators) ⇒ `Object`

Converts a string to camelcase. This method leaves the first character as given. This allows other methods to be used first, such as #uppercase and #lowercase.

"camel_case".camelcase          #=> "camelCase"
"Camel_case".camelcase          #=> "CamelCase"

Custom separators can be used to specify the patterns used to determine where capitalization should occur. By default these are underscores (‘_`) and space characters (`s`).

"camel/case".camelcase('/')     #=> "camelCase"

If the first separator is a symbol, either ‘:lower` or `:upper`, then the first characters of the string will be downcased or upcased respectively.

"camel_case".camelcase(:upper)  #=> "CamelCase"

Note that this implementation is different from ActiveSupport’s. If that is what you are looking for you may want #modulize.

# File 'lib/core/facets/string/camelcase.rb', line 24

def camelcase(*separators)
  case separators.first
  when Symbol, TrueClass, FalseClass, NilClass
    first_letter = separators.shift
  end

  separators = ['_', '\s'] if separators.empty?

  str = self.dup

  separators.each do |s|
    str = str.gsub(/(?:#{s}+)([a-z])/){ $1.upcase }
  end

  case first_letter
  when :upper, true
    str = str.gsub(/(\A|\s)([a-z])/){ $1 + $2.upcase }
  when :lower, false
    str = str.gsub(/(\A|\s)([A-Z])/){ $1 + $2.downcase }
  end

  str
end

#capitalized? ⇒ `Boolean`

Return true if the string is capitalized, otherwise false.

"This".capitalized?  #=> true
"THIS".capitalized?  #=> false
"this".capitalized?  #=> false

Note Ruby’s strange concept of capitalized. See capitalcase for the more command conception.

CREDIT: Phil Tomson

Returns:

(Boolean)



14
15
16

# File 'lib/core/facets/string/capitalized.rb', line 14

def capitalized?
  capitalize == self
end

#cleanlines(&block) ⇒ `Object`

Returns an Enumerator for iterating over each line of the string, stripped of whitespace on either side.

"this\nthat\nother\n".cleanlines.to_a  #=> ['this', 'that', 'other']

# File 'lib/core/facets/string/cleanlines.rb', line 11

def cleanlines(&block)
  if block
    scan(/^.*?$/) do |line|
      block.call(line.strip)
    end
  else
    str = self
    Enumerator.new do |output|
      str.scan(/^.*?$/) do |line|
        output.yield(line.strip)
      end
    end
  end
end

#cleave(threshold = nil, len = nil) ⇒ `Object`

Cleave a string. Break a string in two parts at the nearest whitespace.

CREDIT: Trans

# File 'lib/core/facets/string/cleave.rb', line 8

def cleave(threshold=nil, len=nil)
  l = (len || size / 2)
  t = threshold || size

  h1 = self[0...l]
  h2 = self[l..-1]

  i1 = h1.rindex(/\s/) || 0
  d1 = (i1 - l).abs

  d2 = h2.index(/\s/) || l
  i2 = d2 + l

  d1 = (i1-l).abs
  d2 = (i2-l).abs

  if [d1, d2].min > t
    i = t
  elsif d1 < d2
    i = i1
  else
    i = i2
  end

  #dup.insert(l, "\n").gsub(/^\s+|\s+$/, '')
  return self[0..i].to_s.strip, self[i+1..-1].to_s.strip
end

#cmp(other) ⇒ `Object`

Compare method that takes length into account. Unlike #<=>, this is compatible with #succ.

"abc".cmp("abc")   #=>  0
"abcd".cmp("abc")  #=>  1
"abc".cmp("abcd")  #=> -1
"xyz".cmp("abc")   #=>  1

CREDIT: Peter Vanbroekhoven

TODO: Move String#cmp to string/ directory.

# File 'lib/core/facets/comparable/cmp.rb', line 34

def cmp(other)
  return -1 if length < other.length
  return 1 if length > other.length
  self <=> other  # alphabetic compare
end

#compress_lines(spaced = true) ⇒ `Object`

Matches any whitespace (including newline) and replaces with a single space

string = <<-QUERY.compress_lines
  SELECT name
  FROM users
QUERY

string  #=> "SELECT name FROM users"



12
13
14

# File 'lib/core/facets/string/compress_lines.rb', line 12

def compress_lines(spaced = true)
  split($/).map{ |line| line.strip }.join(spaced ? ' ' : '')
end

#crypt(salt = nil) ⇒ `Object`

Common Unix cryptography method. This adds a default salt to the built-in crypt method.

NOTE: This method is not a common core extension and is not loaded automatically when using require 'facets'.

Uncommon:

require ‘facets/string/crypt’

# File 'lib/core/facets/string/crypt.rb', line 14

def crypt(salt=nil)
  salt ||= (
    (rand(26) + (rand(2) == 0 ? 65 : 97) ).chr +
    (rand(26) + (rand(2) == 0 ? 65 : 97) ).chr
  )
  _crypt(salt)
end

#divide(re) ⇒ `Object`

Breaks a string up into an array based on a regular expression. Similar to scan, but includes the matches.

s = "<p>This<b>is</b>a test.</p>"
s.divide( /\<.*?\>/ )
#=> ["<p>This", "<b>is", "</b>a test.", "</p>"]

CREDIT: Trans

# File 'lib/core/facets/string/divide.rb', line 12

def divide( re )
  re2 = /#{re}.*?(?=#{re}|\Z)/
  scan(re2) #{re}(?=#{re})/)
end

#downcase? ⇒ `Boolean`

Return true if the string is lowercase (downcase), otherwise false.

"THIS".downcase?  #=> false
"This".downcase?  #=> false
"this".downcase?  #=> true

CREDIT: Phil Tomson

Returns:

(Boolean)



26
27
28

# File 'lib/core/facets/string/capitalized.rb', line 26

def downcase?
  downcase == self
end

#each_word(&block) ⇒ `Object`

Iterate through each word of a string.

a = []

"list of words".each_word { |word| a << word }

a  #=> ['list', 'of', 'words']



13
14
15

# File 'lib/core/facets/string/each_word.rb', line 13

def each_word(&block)
  words.each(&block)
end

#edit_distance(str2) ⇒ `Object`

Levenshtein distance algorithm implementation for Ruby, with UTF-8 support.

The Levenshtein distance is a measure of how similar two strings s and t are, calculated as the number of deletions/insertions/substitutions needed to transform s into t. The greater the distance, the more the strings differ.

The Levenshtein distance is also sometimes referred to as the easier-to-pronounce-and-spell ‘edit distance’.

Calculate the Levenshtein distance between two strings self and str2. self and str2 should be ASCII, UTF-8, or a one-byte-per character encoding such as ISO-8859-*.

The strings will be treated as UTF-8 if $KCODE is set appropriately (i.e. ‘u’). Otherwise, the comparison will be performed byte-by-byte. There is no specific support for Shift-JIS or EUC strings.

When using Unicode text, be aware that this algorithm does not perform normalisation. If there is a possibility of different normalised forms being used, normalisation should be performed beforehand.

CREDIT: Paul Battley

# File 'lib/core/facets/string/edit_distance.rb', line 25

def edit_distance(str2)
  str1 = self
  if $KCODE =~ /^U/i
    unpack_rule = 'U*'
  else
    unpack_rule = 'C*'
  end

  s = str1.unpack(unpack_rule)
  t = str2.unpack(unpack_rule)

  n = s.length
  m = t.length

  return m if (0 == n)
  return n if (0 == m)

  d = (0..m).to_a
  x = nil

  (0...n).each do |i|
    e = i+1
    (0...m).each do |j|
      cost = (s[i] == t[j]) ? 0 : 1
      x = [
        d[j+1] + 1, # insertion
        e + 1,      # deletion
        d[j] + cost # substitution
      ].min
      d[j] = e
      e = x
    end
    d[m] = x
  end

  return x
end

#exclude?(str) ⇒ `Boolean`

The inverse of include?.

Returns:

(Boolean)



5
6
7

# File 'lib/core/facets/string/exclude.rb', line 5

def exclude?(str)
  !include?(str)
end

#expand_tabs(n = 8) ⇒ `Object` Also known as: expand_tab

Expands tabs to n spaces. Non-destructive. If n is 0, then tabs are simply removed. Raises an exception if n is negative.

"\t\tHey".expand_tabs(2)  #=> "    Hey"

Thanks to GGaramuno for a more efficient algorithm. Very nice.

CREDIT: Gavin Sinclair, Noah Gibbs, GGaramuno

TODO: Don’t much care for the name String#expand_tabs. What about a more concise name like #detab?

Raises:

(ArgumentError)

# File 'lib/core/facets/string/expand_tab.rb', line 16

def expand_tabs(n=8)
  n = n.to_int
  raise ArgumentError, "n must be >= 0" if n < 0
  return gsub(/\t/, "") if n == 0
  return gsub(/\t/, " ") if n == 1
  str = self.dup
  while
    str.gsub!(/^([^\t\n]*)(\t+)/) { |f|
      val = ( n * $2.size - ($1.size % n) )
      $1 << (' ' * val)
    }
  end
  str
end

#file ⇒ `Object`

Use fluent notation for making file directives.

For instance, if we had a file ‘foo.txt’,

'foo.txt'.file.mtime

# File 'lib/core/facets/string/file.rb', line 13

def file
  f = self
  Functor.new do |op, *a, &b|
    File.send(op, f, *a, &b)
  end
end

#fold(ignore_indented = false) ⇒ `Object`

Returns a new string with all new lines removed from adjacent lines of text.

s = "This is\na test.\n\nIt clumps\nlines of text."
s.fold

produces

"This is a test.\n\nIt clumps lines of text. "

TODO: One arguable flaw with this that might need a fix: if the given string ends in a newline, it is replaced with a single space.

CREDIT: Trans

# File 'lib/core/facets/string/fold.rb', line 19

def fold(ignore_indented=false)
  ns = ''
  i = 0
  self.scan(/(\n\s*\n|\Z)/m) do |m|
    b = $~.begin(1)
    e = $~.end(1)
    nl = $&
    tx = slice(i...b)
    if ignore_indented and slice(i...b) =~ /^[ ]+/
      ns << tx
    else
      ns << tx.gsub(/[ ]*\n+/,' ')
    end
    ns << nl
    i = e
  end
  ns
end

#indent(n, c = ' ') ⇒ `Object`

Indent left or right by ‘n` spaces, or `n` number of `c` string.

n - The number of spaces to indent. [Integer] c - Character to use, if other then space. [String]

Returns a new string with the indention. [String]

Credit: Gavin Sinclair Credit: Trans Credit: Tyler Rick

# File 'lib/core/facets/string/indent.rb', line 14

def indent(n, c=' ')
  if n >= 0
    gsub(/^/, c * n)
  else
    gsub(/^#{Regexp.escape(c)}{0,#{-n}}/, "")
  end
end

#indent!(n, c = ' ') ⇒ `Object`

Equivalent to String#indent, but modifies the receiver in place.

n - The number of spaces to indent. [Integer] c - Character to use, if other then space. [String]

Returns this string with the indention. [String]



29
30
31

# File 'lib/core/facets/string/indent.rb', line 29

def indent!(n, c=' ')
  replace(indent(n,c))
end

#index_all(s, reuse = false) ⇒ `Object`

Like index but returns an array of all index locations. The reuse flag allows the trailing portion of a match to be reused for subsquent matches.

"abcabcabc".index_all('a')  #=> [0,3,6]

"bbb".index_all('bb', false)  #=> [0]
"bbb".index_all('bb', true)   #=> [0,1]

TODO: Culd probably be defined for Indexable in general too.

# File 'lib/core/facets/string/index_all.rb', line 14

def index_all(s, reuse=false)
  s = Regexp.new(Regexp.escape(s)) unless Regexp===s
  ia = []; i = 0
  while (i = index(s,i))
    ia << i
    i += (reuse ? 1 : $~[0].size)
  end
  ia
end

#lchomp(match) ⇒ `Object`

Left chomp.

"help".lchomp("h")  #=> "elp"
"help".lchomp("k")  #=> "help"

CREDIT: Trans

# File 'lib/core/facets/string/lchomp.rb', line 10

def lchomp(match)
  if index(match) == 0
    self[match.size..-1]
  else
    self.dup
  end
end

#lchomp!(match) ⇒ `Object`

In-place left chomp.

"help".lchomp("h")  #=> "elp"
"help".lchomp("k")  #=> "help"

CREDIT: Trans

# File 'lib/core/facets/string/lchomp.rb', line 25

def lchomp!(match)
  if index(match) == 0
    self[0...match.size] = ''
    self
  end
end

#line_wrap(width, tabs = 4) ⇒ `Object`

Line wrap at width.

s = "1234567890".line_wrap(5)

s  #=> "12345\n67890\n"

CREDIT: Trans

# File 'lib/core/facets/string/line_wrap.rb', line 11

def line_wrap(width, tabs=4)
  s = gsub(/\t/,' ' * tabs) # tabs default to 4 spaces
  s = s.gsub(/\n/,' ')
  r = s.scan( /.{1,#{width}}/ )
  r.join("\n") << "\n"
end

#linear ⇒ `Object`

Like ‘#newlines` but returns a Functor instead.

"a \n b \n c".linear.strip   #=> "a\nb\nc"

# File 'lib/core/facets/string/linear.rb', line 11

def linear
  Functor.new do |op, *a, &b|
    lines.map { |line|
      line.chomp.public_send(op, *a, &b)
    }.join("\n")
  end
end

#lower_camelcase(*separators) ⇒ `Object`

Deprecated.

Use ‘#camelcase(:lower)` instead.

Same as #camelcase but converts first letter to lowercase.

"camel_case".lower_camelcase   #=> "camelCase"
"Camel_case".lower_camelcase   #=> "camelCase"



68
69
70

# File 'lib/core/facets/string/camelcase.rb', line 68

def lower_camelcase(*separators)
  camelcase(:lower, *separators)
end

#lowercase ⇒ `Object`

Downcase first letter.

# File 'lib/core/facets/string/uppercase.rb', line 17

def lowercase
  str = to_s
  str[0,1].downcase + str[1..-1]
end

#margin(num = nil, opts = {}) ⇒ `Object`

Preserve relative tabbing such that the line with the least amount of white space ends up with the given number of spaces before non-space and all other lines move relative to it.

Because of the nature of this method, leading tab characters (‘t`) must be converted to spaces. The size of a tab can be set via the `:tab` option. The default size is 2.

If the ‘:lead` option is set, then the relative margin is determined by the first non-blank line, instead of the minimum white-space for all lines.

Arguments

num   - The size of the desired margin. [Integer]
opts  - Margin options. [Hash]

Options

:tab  - Size of tab character in spaces. [Integer]
:lead - Use first non-blank line as relative marker. [Boolean]

Returns a new String with adjusted margin. [String]

Author: Gavin Sinclair Author: Trans

Raises:

(ArgumentError)

# File 'lib/core/facets/string/margin.rb', line 32

def margin(num=nil, opts={})
  # TODO: temporary to help people transition
  raise ArgumentError, "String#margin has been renamed to #trim." unless num

  tab = opts[:tab] || 2
  str = gsub("\t", " " * tab)  # TODO: only leading tabs ?

  if opts[:lead]
    if self =~ /^( *)\S/
      indent(num - $1.length)
    else
      self
    end
  else
    min = []
    str.each_line do |line|
      next if line.strip.empty?
      min << line.index(/\S/)
    end
    min = min.min
    str.indent(num - min)
  end
end

#methodize ⇒ `Object`

Translate a class or module name to a suitable method name.

"My::CoolClass".methodize  #=> "my__cool_class"

# File 'lib/core/facets/string/methodize.rb', line 7

def methodize
  gsub(/([A-Z]+)([A-Z])/,'\1_\2').
  gsub(/([a-z])([A-Z])/,'\1_\2').
  gsub('/' ,'__').
  gsub('::','__').
  downcase
end

#modulize ⇒ `Object`

Converts a string to module name representation.

This is essentially #camelcase, but it also converts ‘/’ to ‘::’ which is useful for converting paths to namespaces.

Examples

"method_name".modulize    #=> "MethodName"
"method/name".modulize    #=> "Method::Name"

# File 'lib/core/facets/string/modulize.rb', line 14

def modulize
  #gsub('__','/').  # why was this ever here?
  gsub(/__(.?)/){ "::#{$1.upcase}" }.
  gsub(/\/(.?)/){ "::#{$1.upcase}" }.
  gsub(/(?:_+|-+)([a-z])/){ $1.upcase }.
  gsub(/(\A|\s)([a-z])/){ $1 + $2.upcase }
end

#mscan(re) ⇒ `Object` Also known as: each_match

Like #scan but returns MatchData ($~) rather then matched string ($&).

CREDIT: Trans

# File 'lib/core/facets/string/mscan.rb', line 8

def mscan(re) #:yield:
  if block_given?
    scan(re) { yield($~) }
  else
    m = []
    scan(re) { m << $~ }
    m
  end
end

#natcmp(str2, caseInsensitive = false) ⇒ `Object`

‘Natural order’ comparison of strings, e.g. …

"my_prog_v1.1.0" < "my_prog_v1.2.0" < "my_prog_v1.10.0"

which does not follow alphabetically. A secondary parameter, if set to true, makes the comparison case insensitive.

"Hello.1".natcmp("Hello.10")  #=> -1

TODO: Invert case flag?

Author:

Alan Davies
Martin Pool

# File 'lib/core/facets/string/natcmp.rb', line 47

def natcmp(str2, caseInsensitive=false)
  str1 = self.dup
  str2 = str2.dup
  compareExpression = /^(\D*)(\d*)(.*)$/

  if caseInsensitive
    str1.downcase!
    str2.downcase!
  end

  # -- remove all whitespace
  str1.gsub!(/\s*/, '')
  str2.gsub!(/\s*/, '')

  while (str1.length > 0) or (str2.length > 0) do
    # -- extract non-digits, digits and rest of string
    str1 =~ compareExpression
    chars1, num1, str1 = $1.dup, $2.dup, $3.dup
    str2 =~ compareExpression
    chars2, num2, str2 = $1.dup, $2.dup, $3.dup
    # -- compare the non-digits
    case (chars1 <=> chars2)
      when 0 # Non-digits are the same, compare the digits...
        # If either number begins with a zero, then compare alphabetically,
        # otherwise compare numerically
        if (num1[0] != 48) and (num2[0] != 48)
          num1, num2 = num1.to_i, num2.to_i
        end
        case (num1 <=> num2)
          when -1 then return -1
          when 1 then return 1
        end
      when -1 then return -1
      when 1 then return 1
    end # case
  end # while

  # -- strings are naturally equal
  return 0
end

#nchar(n, replacement = nil) ⇒ `Object`

Returns n characters of the string. If n is positive the characters are from the beginning of the string. If n is negative from the end of the string.

str = "this is text"

str.nchar(4)            #=> "this"
str.nchar(-4)           #=> "text"

Alternatively a replacement string can be given, which will replace the n characters.

str.nchar(4, 'that')    #=> "that is text"

The original string remains unaffected.

str  #=> "this is text"

# File 'lib/core/facets/string/nchar.rb', line 21

def nchar(n, replacement=nil)
  if replacement
    s = self.dup
    n > 0 ? (s[0...n] = replacement) : (s[n..-1] = replacement)
    s
  else
    n > 0 ? self[0...n] : self[n..-1]
  end
end

#newlines(&block) ⇒ `Object`

Returns an Enumerator for iterating over each line of the string, void of the termining newline character, in contrast to #lines which retains it.

"a\nb\nc".newlines.class.assert == Enumerator
"a\nb\nc".newlines.to_a.assert == %w{a b c}

a = []
"a\nb\nc".newlines{|nl| a << nl}
a.assert == %w{a b c}

# File 'lib/core/facets/string/newlines.rb', line 16

def newlines(&block)
  if block
    scan(/^.*?$/) do |line|
      block.call(line.chomp)
    end
  else
    str = self
    Enumerator.new do |output|
      str.scan(/^.*?$/) do |line|
        output.yield(line.chomp)
      end
    end
  end
end

#number? ⇒ `Boolean`

Returns true if it’s a decimal digits.

"123_456_789_123_456_789.123_456_000_111".number?  # => true
"1.23".number?  # => true
"1.23a".number? # => false

CREDIT: u2

Returns:

(Boolean)



11
12
13

# File 'lib/core/facets/string/number.rb', line 11

def number?
  !!self.match(/\A[+-]?\d+?(_?\d+?)*?(\.\d+(_?\d+?)*?)?\Z/)
end

#object_state(data = nil) ⇒ `Object`



54
55
56

# File 'lib/core/facets/object/object_state.rb', line 54

def object_state(data=nil)
  data ? replace(data) : dup
end

#pathize ⇒ `Object`

Transforms a namespace, i.e. a class or module name, into a viable file path.

"ExamplePathize".pathize           #=> "example_pathize"
"ExamplePathize::Example".pathize  #=> "example_pathize/example"

Compare this method to {String#modulize) and {String#methodize).

# File 'lib/core/facets/string/pathize.rb', line 11

def pathize
  gsub(/([A-Z]+)([A-Z])/,'\1_\2').
  gsub(/([a-z])([A-Z])/,'\1_\2').
  gsub('__','/').
  gsub('::','/').
  gsub(/\s+/, '').                # spaces are bad form
  gsub(/[?%*:|"<>.]+/, '').   # reserved characters
  downcase
end

#quote(type = :double, count = nil) ⇒ `Object`

Return a new string embraced by given type and count of quotes. The arguments can be given in any order.

If no type is given, double quotes are assumed.

"quote me".quote     #=> '"quote me"'

If no type but a count is given then :mixed is assumed.

"quote me".quote(1)  #=> %q{'quote me'}
"quote me".quote(2)  #=> %q{"quote me"}
"quote me".quote(3)  #=> %q{'"quote me"'}

Symbols can be used to describe the type.

"quote me".quote(:single)    #=> %q{'quote me'}
"quote me".quote(:double)    #=> %q{"quote me"}
"quote me".quote(:back)      #=> %q{`quote me`}
"quote me".quote(:bracket)   #=> %q{`quote me'}

Or the character itself.

"quote me".quote("'")     #=> %q{'quote me'}
"quote me".quote('"')     #=> %q{"quote me"}
"quote me".quote("`")     #=> %q{`quote me`}
"quote me".quote("`'")    #=> %q{`quote me'}

CREDIT: Trans

# File 'lib/core/facets/string/quote.rb', line 32

def quote(type=:double, count=nil)
  if Integer === type
    tmp   = count
    count = type
    type  = tmp || :mixed
  else
    count ||= 1
  end

  type = type.to_s unless Integer===type

  case type
  when "'", 'single', 's', 1
    f = "'" * count
    b = f
  when '"', 'double', 'd', 2
    f = '"' * count
    b = f
  when '`', 'back', 'b', -1
    f = '`' * count
    b = f
  when "`'", 'bracket', 'sb'
    f = "`" * count
    b = "'" * count
  when "'\"", 'mixed', "m", Integer
    c = (count.to_f / 2).to_i
    f = '"' * c
    b = f
    if count % 2 != 0
      f = "'" + f
      b = b + "'"
    end
  else
    raise ArgumentError, "unrecognized quote type -- #{type}"
  end
  "#{f}#{self}#{b}"
end

#range(pattern, offset = 0) ⇒ `Object`

Like #index but returns a Range.

"This is a test!".range('test')  #=> (10..13)

CREDIT: Trans

# File 'lib/core/facets/string/range.rb', line 9

def range(pattern, offset=0)
  unless Regexp === pattern
    pattern = Regexp.new(Regexp.escape(pattern.to_s))
  end
  string = self[offset..-1]
  if md = pattern.match(string)
    return (md.begin(0)+offset)..(md.end(0)+offset-1)
  end
  nil
end

#range_all(pattern, reuse = false) ⇒ `Object`

Like #index_all but returns an array of Ranges.

"abc123abc123".range_all('abc')  #=> [0..2, 6..8]

TODO: Add offset ?

CREDIT: Trans

# File 'lib/core/facets/string/range.rb', line 28

def range_all(pattern, reuse=false)
  r = []; i = 0
  while i < self.length
    rng = range(pattern, i)
    if rng
      r << rng
      i += reuse ? 1 : rng.end + 1
    else
      break
    end
  end
  r.uniq
end

#range_of_line ⇒ `Object`

Returns an array of ranges mapping the characters per line.

"this\nis\na\ntest".range_of_line
#=> [0..4, 5..7, 8..9, 10..13]

CREDIT: Trans

# File 'lib/core/facets/string/range.rb', line 50

def range_of_line
  offset=0; charmap = []
  each_line do |line|
    charmap << (offset..(offset + line.length - 1))
    offset += line.length
  end
  charmap
end

#remove(pattern) ⇒ `Object`

Removes all occurrences of a pattern in a string.

Returns a new [String] with all occurrences of the pattern removed.



6
7
8

# File 'lib/core/facets/string/remove.rb', line 6

def remove(pattern)
  gsub(pattern, '')
end

#remove!(pattern) ⇒ `Object`

Removes all occurrences of a pattern in a string.

Returns the [String] with all occurrences of the pattern removed.



13
14
15

# File 'lib/core/facets/string/remove.rb', line 13

def remove!(pattern)
  gsub!(pattern, '')
end

#rewrite(rules) ⇒ `Object`

Apply a set of rules in the form of regular expression matches to the string.

rules - The array containing rule-pairs (match, write).

Keep in mind that the order of rules is significant.

Returns the rewritten String.

CREDIT: George Moschovitis

Raises:

(ArgumentError)

# File 'lib/core/facets/string/rewrite.rb', line 14

def rewrite(rules)
  raise ArgumentError.new('The rules parameter is nil') unless rules
  rewritten_string = dup
  rules.each do |(match,write)|
    rewritten_string.gsub!(match,write)
  end
  return rewritten_string
end

#roman ⇒ `Object`

Considers string a Roman numeral numeral, and converts it to the corresponding integer.

NOTE: This method is not a common core extension and is not loaded automatically when using require 'facets'.

Uncommon:

require ‘facets/string/roman’

# File 'lib/core/facets/roman.rb', line 62

def roman
  roman = upcase
  raise unless roman?
  last = roman[-1,1]
  roman.reverse.split('').inject(0) do |result, c|
    if ROMAN_VALUES[c] < ROMAN_VALUES[last]
      result -= ROMAN_VALUES[c]
    else
      last = c
      result += ROMAN_VALUES[c]
    end
  end
end

#roman? ⇒ `Boolean`

Returns true iif the subject is a valid Roman numeral.

NOTE: This method is not a common core extension and is not loaded automatically when using require 'facets'.

Returns:

(Boolean)

Uncommon:

require ‘facets/string/roman’



84
85
86

# File 'lib/core/facets/roman.rb', line 84

def roman?
  ROMAN =~ upcase
end

#rotate(count = 1) ⇒ `Object`

Rotate string to the left with count. Specifying negative number indicates rotation to the right.

'abcdefgh'.rotate(2)  #=> 'cdefghab'
'abcdefgh'.rotate(-2) #=> 'ghabcdef'

CREDIT: T. Yamada

# File 'lib/core/facets/string/rotate.rb', line 10

def rotate(count=1)
  count+=self.length if count<0
  self.slice(count,self.length-count)+self.slice(0,count)
end

#rotate!(count = 1) ⇒ `Object`

Destructive version of String#rotate

s='abcdefgh'
s.rotate!(2)
s.should eq 'cdefghab'

CREDIT: T. Yamada

22	# File 'lib/core/facets/string/rotate.rb', line 22 def rotate!(count=1) replace(rotate(count)) end

#shatter(re) ⇒ `Object`

Breaks a string up into an array based on a regular expression. Similar to scan, but includes the matches.

s = "<p>This<b>is</b>a test.</p>"
s.shatter( /\<.*?\>/ )

produces

["<p>", "This", "<b>", "is", "</b>", "a test.", "</p>"]

CREDIT: Trans

# File 'lib/core/facets/string/shatter.rb', line 15

def shatter( re )
  r = self.gsub( re ){ |s| "\1" + s + "\1" }
  while r[0,1] == "\1" ; r[0] = '' ; end
  while r[-1,1] == "\1" ; r[-1] = '' ; end
  r.split("\1")
end

#similarity(str_in) ⇒ `Object`

A fuzzy matching mechanism. Returns a score from 0-1, based on the number of shared edges. To be effective, the strings must be of length 2 or greater.

"Alexsander".similarity("Aleksander")  #=> 0.9

The way it works:

Converts each string into a “graph like” object, with edges …

"alexsander" -> [ alexsander, alexsand, alexsan ... lexsand ... san ... an, etc ]
"aleksander" -> [ aleksander, aleksand ... etc. ]

Perform match, then remove any subsets from this matched set (i.e. a hit

on “san” is a subset of a hit on “sander”) …

Above example, once reduced -> [ ale, sander ]

See’s how many of the matches remain, and calculates a score based

on how many matches, their length, and compare to the length of the larger of the two words.

Still a bit rough. Any suggestions for improvement are welcome.

CREDIT: Derek Lewis

# File 'lib/core/facets/string/similarity.rb', line 29

def similarity(str_in)
  return 0 if str_in == nil
  return 1 if self == str_in

  # -- make a graph of each word (okay, its not a true graph, but is similar)
  graph_A = Array.new
  graph_B = Array.new

  # -- "graph" self
  last = self.length
  (0..last).each do |ff|
    #loc = self.length
    break if ff == last - 1
    wordB = (1..(last-1)).to_a.reverse!
    if (wordB != nil)
      wordB.each do |ss|
        break if ss == ff
        graph_A.push( "#{self[ff..ss]}" )
      end
    end
  end

  # -- "graph" input string
  last = str_in.length
  (0..last).each{ |ff|
    #loc = str_in.length
    break if ff == last - 1
    wordB = (1..(last-1)).to_a.reverse!
    wordB.each do |ss|
      break if ss == ff
      graph_B.push( "#{str_in[ff..ss]}" )
    end
  }

  # -- count how many of these "graph edges" we have that are the same
  matches = graph_A & graph_B

  #--
  #matches = Array.new
  #graph_A.each{ |aa| matches.push(aa) if( graph_B.include?(aa) ) }
  #++

  # -- for eliminating subsets, we want to start with the smallest hits.
  matches.sort!{|x,y| x.length <=> y.length}

  # -- eliminate any subsets
  mclone = matches.dup
  mclone.each_index do |ii|
    reg = Regexp.compile( Regexp.escape(mclone[ii]) )
    count = 0.0
    matches.each{|xx| count += 1 if xx =~ reg}
    matches.delete(mclone[ii]) if count > 1
  end

  score = 0.0
  matches.each{ |mm| score += mm.length }
  self.length > str_in.length ? largest = self.length : largest = str_in.length
  return score/(largest+1)
end

#snakecase ⇒ `Object` Also known as: underscore

Underscore a string such that camelcase, dashes and spaces are replaced by underscores. This is the reverse of #camelcase, albeit not an exact inverse.

"SnakeCase".snakecase         #=> "snake_case"
"Snake-Case".snakecase        #=> "snake_case"
"Snake Case".snakecase        #=> "snake_case"
"Snake  -  Case".snakecase    #=> "snake_case"

Note, this method no longer converts ‘::` to `/`, in that case use the #pathize method instead.

# File 'lib/core/facets/string/snakecase.rb', line 15

def snakecase
  #gsub(/::/, '/').
  gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
  gsub(/([a-z\d])([A-Z])/,'\1_\2').
  tr('-', '_').
  gsub(/\s/, '_').
  gsub(/__+/, '_').
  downcase
end

#splice(idx, sub = nil) ⇒ `Object`

String#slice is essentially the same as #store.

a = "HELLO"
a.splice(1, "X")
a                #=> "HXLLO"

But it acts like #slice! when given a single argument.

a = "HELLO"
a.splice(1)    #=> "E"
a              #=> "HLLO"

CREDIT: Trans

# File 'lib/core/facets/string/splice.rb', line 19

def splice(idx, sub=nil)
  if sub
    store(idx, sub)
  else
    case idx
    when Range
      slice!(idx)
    else
      slice!(idx,1)
    end
  end
end

#squish ⇒ `Object`

Returns the string, first removing all whitespace on both ends of the string, and then changing remaining consecutive whitespace groups into one space each.

%{ Multi-line
   string }.squish                   # => "Multi-line string"

" foo   bar    \n   \t   boo".squish # => "foo bar boo"



11
12
13

# File 'lib/core/facets/string/squish.rb', line 11

def squish
  dup.squish!
end

#squish! ⇒ `Object`

Performs a destructive squish. See String#squish.

# File 'lib/core/facets/string/squish.rb', line 16

def squish!
  strip!
  gsub!(/\s+/, ' ')
  self
end

#titlecase ⇒ `Object`

Transform a string into a form that makes for an acceptable title.

"this is a string".titlecase
#=> "This Is A String"

Author:

Eliazar Parra
Angelo Lakra (apostrophe fix)

# File 'lib/core/facets/string/titlecase.rb', line 11

def titlecase
  tr('_', ' ').
  gsub(/\s+/, ' ').
  gsub(/\b\w/){ $`[-1,1] == "'" ? $& : $&.upcase }
end

#to_b ⇒ `Object`

Interpret common affirmative string meanings as true, otherwise nil or false. Blank space and case are ignored. The following strings that will return true …

true
yes
on
t
1
y
==

The following strings will return nil …

nil
null

All other strings return false.

Here are some exmamples.

"true".to_b   #=> true
"yes".to_b    #=> true
"no".to_b     #=> false
"123".to_b    #=> false

# File 'lib/core/facets/boolean.rb', line 96

def to_b
  case self.downcase.strip
  when 'true', 'yes', 'on', 't', '1', 'y', '=='
    return true
  when 'nil', 'null'
    return nil
  else
    return false
  end
end

#to_date ⇒ `Object`

Parse data from string.

# File 'lib/standard/facets/date.rb', line 428

def to_date
  #::Date::civil(*ParseDate.parsedate(self)[0..2])
  ::Date.new(*::Date._parse(self, false).values_at(:year, :mon, :mday))
end

#to_datetime ⇒ `Object`

Convert string to DateTime.

# File 'lib/standard/facets/date.rb', line 422

def to_datetime
  date = ::Date._parse(self, false).values_at(:year, :mon, :mday, :hour, :min, :sec).map { |arg| arg || 0 }
  ::DateTime.civil(*date)
end

#to_re(esc = false) ⇒ `Object`

Turns a string into a regular expression.

"a?".to_re  #=> /a?/

CREDIT: Trans



9
10
11

# File 'lib/core/facets/string/to_re.rb', line 9

def to_re(esc=false)
  Regexp.new((esc ? Regexp.escape(self) : self))
end

#to_rx(esc = true) ⇒ `Object`

Turns a string into a regular expression. By default it will escape all characters. Use false argument to turn off escaping.

"[".to_rx  #=> /\[/

CREDIT: Trans



21
22
23

# File 'lib/core/facets/string/to_re.rb', line 21

def to_rx(esc=true)
  Regexp.new((esc ? Regexp.escape(self) : self))
end

#to_t(&yld) ⇒ `Object`

Translates a string in the form on a set of numerical and/or alphanumerical characters separated by non-word characters (eg W+) into a Tuple. The values of the tuple will be converted to integers if they are purely numerical.

'1.2.3a'.to_t  #=> [1,2,"3a"]

It you would like to control the interpretation of each value as it is added to the tuple you can supply a block.

'1.2.3a'.to_t { |v| v.upcase }  #=> ["1","2","3A"]

This method calls Tuple.cast_from_string.



266
267
268

# File 'lib/standard/facets/tuple.rb', line 266

def to_t( &yld )
  Tuple.cast_from_string( self, &yld )
end

#to_time(form = :utc) ⇒ `Object`



417
418
419

# File 'lib/standard/facets/date.rb', line 417

def to_time(form = :utc)
  ::Time.__send__("#{form}_time", *::Date._parse(self, false).values_at(:year, :mon, :mday, :hour, :min, :sec).map{|arg| arg || 0 })
end

#trim(num = 0) ⇒ `Object`

Control the margin of a string using a trim character.

The first character of the second line of the string is used as the trim character. This method is useful when literal multi-line strings are needed in code.

num - Number of extra spaces with which to replace the

trim character. [Integer]

Examples

x = %Q{
      | This
      |   is
      |    trim!
      }.trim

Returns string with the margin trimed-off.

Note: This this method used to be called ‘margin` prior to Facets 3.0.

Since: 3.0.0 Author: Trans

# File 'lib/core/facets/string/trim.rb', line 27

def trim(num=0)
  #d = /\A.*\n\s*(.)/.match( self )[1]
  #d = /\A\s*(.)/.match( self)[1] unless d

  md = /\A.*\n\s*(.)/.match(self) || /\A\s*(.)/.match(self)

  d = md[1]

  return '' unless d

  if num == 0
    gsub(/\n\s*\Z/,'').gsub(/^\s*[#{d}]/, '')
  else
    gsub(/\n\s*\Z/,'').gsub(/^\s*[#{d}]/, ' ' * num)
  end
end

#unbracket(bra = nil, ket = nil) ⇒ `Object`

Return a new string with the given brackets removed. If only one bracket char is given it will be removed from either side.

"{unwrap me}".unbracket('{')        #=> "unwrap me"
"--unwrap me!".unbracket('--','!')  #=> "unwrap me"

CREDIT: Trans

# File 'lib/core/facets/string/bracket.rb', line 37

def unbracket(bra=nil, ket=nil)
  if bra
    ket = BRA2KET[bra] unless ket
    ket = ket ? ket : bra
    s = self.dup
    s.gsub!(%r[^#{Regexp.escape(bra)}], '')
    s.gsub!(%r[#{Regexp.escape(ket)}$], '')
    return s
  else
    if m = BRA2KET[ self[0,1] ]
      return self.slice(1...-1) if self[-1,1]  == m
    end
  end
  return self.dup  # if nothing else
end

#unbracket!(bra = nil, ket = nil) ⇒ `Object`

Inplace version of #unbracket.

CREDIT: Trans



57
58
59

# File 'lib/core/facets/string/bracket.rb', line 57

def unbracket!(bra=nil, ket=nil)
  self.replace( unbracket(bra, ket) )
end

#unfold ⇒ `Object`

Unfold paragraphs such that new lines are removed from between sentences of the same paragraph.

Note that #rstrip is called on the final result, but this may change in the future.

FIXME: Sometimes adds one too many blank lines, which is why we are

using rstrip. Fix and probably remove the rstrip.

# File 'lib/core/facets/string/unfold.rb', line 12

def unfold
  blank = false
  text  = ''
  split(/\n/).each do |line|
    if /\S/ !~ line
      text << "\n\n"
      blank = true
    else
      if /^(\s+|[*])/ =~ line
        text << (line.rstrip + "\n")
      else
        text << (line.rstrip + " ")
      end
      blank = false
    end
  end
  text = text.gsub(/(\n){3,}/,"\n\n")
  text.rstrip
end

#unindent(size = nil) ⇒ `Object`

Remove excessive indentation. Useful for multi-line strings embeded in already indented code.

size - The number of spaces to indent. [Integer]

Examples

<<-END.unindent
    ohaie
      wurld
END 
#=> "ohaie\n  wurld"

Returns a new unindented string. [String]

Credit: Noah Gibbs Credit: mynyml

# File 'lib/core/facets/string/indent.rb', line 51

def unindent(size=nil)
  if size
    indent(-size)
  else
    char = ' '
    self.scan(/^[\ \t]*\S/) do |m|
      if size.nil? || m.size < size
        size = m.size
        char = m[0...-1]
      end
    end
    size -= 1
    indent(-size, char)
  end
end

#unindent! ⇒ `Object`

Equivalent to String#unindent, but modifies the receiver in place.

Returns this string unindented. [String]

Credit: mynyml



73
74
75

# File 'lib/core/facets/string/indent.rb', line 73

def unindent!
  replace(unindent)
end

#unquote ⇒ `Object`

Remove quotes from string.

"'hi'".unquote    #=> "hi"

CREDIT: Trans

# File 'lib/core/facets/string/quote.rb', line 76

def unquote
  s = self.dup

  case self[0,1]
  when "'", '"', '`'
    s[0] = ''
  end

  case self[-1,1]
  when "'", '"', '`'
    s[-1] = ''
  end

  return s
end

#upcase? ⇒ `Boolean`

Is the string upcase/uppercase?

"THIS".upcase?  #=> true
"This".upcase?  #=> false
"this".upcase?  #=> false

CREDIT: Phil Tomson

Returns:

(Boolean)



41
42
43

# File 'lib/core/facets/string/capitalized.rb', line 41

def upcase?
  upcase == self
end

#upper_camelcase(*separators) ⇒ `Object`

Deprecated.

Use ‘#camelcase(:upper)` instead.

Same as #camelcase but converts first letter to uppercase.

"camel_case".upper_camelcase   #=> "CamelCase"
"Camel_case".upper_camelcase   #=> "CamelCase"



56
57
58

# File 'lib/core/facets/string/camelcase.rb', line 56

def upper_camelcase(*separators)
  camelcase(:upper, *separators)
end

#uppercase ⇒ `Object`

Upcase first letter.

NOTE: One might argue that this method should behave the same as #upcase and rather this behavior should be in place of #captialize. Probably so, but since Matz has already defined #captialize the way it is, this name seems most fitting to the missing behavior.

# File 'lib/core/facets/string/uppercase.rb', line 10

def uppercase
  str = to_s
  str[0,1].upcase + str[1..-1]
end

#variablize ⇒ `Object`

Prepend an “@” to the beginning of a string to make a instance variable name. This also replaces non-valid characters with underscores.

# File 'lib/core/facets/string/variablize.rb', line 7

def variablize
  v = gsub(/\W/, '_')
  "@#{v}"
end

#word_wrap(col_width = 80) ⇒ `Object`

Word wrap a string not exceeding max width.

"this is a test".word_wrap(4)

produces …

this
is a
test

This is basic implementation of word wrap, but smart enough to suffice for most use cases.

CREDIT: Gavin Kistner, Dayne Broderson



18
19
20

# File 'lib/core/facets/string/word_wrap.rb', line 18

def word_wrap( col_width=80 )
  self.dup.word_wrap!( col_width )
end

#word_wrap!(col_width = 80) ⇒ `Object`

As with #word_wrap, but modifies the string in place.

CREDIT: Gavin Kistner, Dayne Broderson

# File 'lib/core/facets/string/word_wrap.rb', line 26

def word_wrap!( col_width=80 )
  self.gsub!( /(\S{#{col_width}})(?=\S)/, '\1 ' )
  self.gsub!( /(.{1,#{col_width}})(?:\s+|$)/, "\\1\n" )
  self
end

#words ⇒ `Object`

Returns an array of characters.

"abc 123".words  #=> ["abc","123"]



7
8
9

# File 'lib/core/facets/string/words.rb', line 7

def words
  self.split(/\s+/)
end

#words_without_punctuation ⇒ `Object`

Returns an array of words in the commonly-understood sense (not including punctuation). This takes into account international punctuation characters as well as English ones.

'Slowly, grudgingly he said: "This has to stop."'.words
 => ["Slowly", "grudgingly", "he", "said", "This", "has", "to", "stop"]

# File 'lib/core/facets/string/words.rb', line 16

def words_without_punctuation
  s = self.dup
  s.gsub!(/[.?¿¡…!,:：;—"。？！、‘“”„«»〈〉《》，\/\[\]]/, ' ')
  s.gsub!('- ', ' ')
  s.squeeze!(" ")
  s.strip.split(" ")
end

Class: String

Constant Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Random::StringExtensions

Methods included from Indexable

Class Method Details

.interpolate(&str) ⇒ Object

.random(len = 32, character_set = ["A".."Z", "a".."z", "0".."9"]) ⇒ Object

Instance Method Details

#-(pattern) ⇒ Object

#/(path) ⇒ Object

#^(aString) ⇒ Object

#_crypt ⇒ Object

#acronym ⇒ Object

#align(direction, n, sep = "\n", c = ' ') ⇒ Object

#align_center(n, sep = "\n", c = ' ') ⇒ Object

#align_left(n, sep = "\n", c = ' ') ⇒ Object

#align_right(n, sep = "\n", c = ' ') ⇒ Object

#ascii_only(alt = '') ⇒ Object

#ascii_only!(alt = '') ⇒ Object

#blank? ⇒ Boolean

#bracket(bra, ket = nil) ⇒ Object

#bracket!(bra, ket = nil) ⇒ Object

#briefcase ⇒ Object

#camelcase(*separators) ⇒ Object

#capitalized? ⇒ Boolean

#cleanlines(&block) ⇒ Object

#cleave(threshold = nil, len = nil) ⇒ Object

#cmp(other) ⇒ Object

#compress_lines(spaced = true) ⇒ Object

#crypt(salt = nil) ⇒ Object

#divide(re) ⇒ Object

#downcase? ⇒ Boolean

#each_word(&block) ⇒ Object

#edit_distance(str2) ⇒ Object

#exclude?(str) ⇒ Boolean

#expand_tabs(n = 8) ⇒ Object Also known as: expand_tab

#file ⇒ Object

#fold(ignore_indented = false) ⇒ Object

#indent(n, c = ' ') ⇒ Object

#indent!(n, c = ' ') ⇒ Object

#index_all(s, reuse = false) ⇒ Object

#lchomp(match) ⇒ Object

#lchomp!(match) ⇒ Object

#line_wrap(width, tabs = 4) ⇒ Object

#linear ⇒ Object

#lower_camelcase(*separators) ⇒ Object

#lowercase ⇒ Object

#margin(num = nil, opts = {}) ⇒ Object

#methodize ⇒ Object

#modulize ⇒ Object

#mscan(re) ⇒ Object Also known as: each_match

#natcmp(str2, caseInsensitive = false) ⇒ Object

#nchar(n, replacement = nil) ⇒ Object

#newlines(&block) ⇒ Object

#number? ⇒ Boolean

#object_state(data = nil) ⇒ Object

#pathize ⇒ Object

#quote(type = :double, count = nil) ⇒ Object

#range(pattern, offset = 0) ⇒ Object

#range_all(pattern, reuse = false) ⇒ Object

#range_of_line ⇒ Object

#remove(pattern) ⇒ Object

#remove!(pattern) ⇒ Object

#rewrite(rules) ⇒ Object

#roman ⇒ Object

#roman? ⇒ Boolean

#rotate(count = 1) ⇒ Object

#rotate!(count = 1) ⇒ Object

#shatter(re) ⇒ Object

#similarity(str_in) ⇒ Object

#snakecase ⇒ Object Also known as: underscore

#splice(idx, sub = nil) ⇒ Object

#squish ⇒ Object

#squish! ⇒ Object

#titlecase ⇒ Object

#to_b ⇒ Object

#to_date ⇒ Object

#to_datetime ⇒ Object

.interpolate(&str) ⇒ `Object`

.random(len = 32, character_set = ["A".."Z", "a".."z", "0".."9"]) ⇒ `Object`

#-(pattern) ⇒ `Object`

#/(path) ⇒ `Object`

#^(aString) ⇒ `Object`

#_crypt ⇒ `Object`

#acronym ⇒ `Object`

#align(direction, n, sep = "\n", c = ' ') ⇒ `Object`

#align_center(n, sep = "\n", c = ' ') ⇒ `Object`

#align_left(n, sep = "\n", c = ' ') ⇒ `Object`

#align_right(n, sep = "\n", c = ' ') ⇒ `Object`

#ascii_only(alt = '') ⇒ `Object`

#ascii_only!(alt = '') ⇒ `Object`

#blank? ⇒ `Boolean`

#bracket(bra, ket = nil) ⇒ `Object`

#bracket!(bra, ket = nil) ⇒ `Object`

#briefcase ⇒ `Object`

#camelcase(*separators) ⇒ `Object`

#capitalized? ⇒ `Boolean`

#cleanlines(&block) ⇒ `Object`

#cleave(threshold = nil, len = nil) ⇒ `Object`

#cmp(other) ⇒ `Object`

#compress_lines(spaced = true) ⇒ `Object`

#crypt(salt = nil) ⇒ `Object`

#divide(re) ⇒ `Object`

#downcase? ⇒ `Boolean`

#each_word(&block) ⇒ `Object`

#edit_distance(str2) ⇒ `Object`

#exclude?(str) ⇒ `Boolean`

#expand_tabs(n = 8) ⇒ `Object` Also known as: expand_tab

#file ⇒ `Object`

#fold(ignore_indented = false) ⇒ `Object`

#indent(n, c = ' ') ⇒ `Object`

#indent!(n, c = ' ') ⇒ `Object`

#index_all(s, reuse = false) ⇒ `Object`

#lchomp(match) ⇒ `Object`

#lchomp!(match) ⇒ `Object`

#line_wrap(width, tabs = 4) ⇒ `Object`

#linear ⇒ `Object`

#lower_camelcase(*separators) ⇒ `Object`

#lowercase ⇒ `Object`

#margin(num = nil, opts = {}) ⇒ `Object`

#methodize ⇒ `Object`

#modulize ⇒ `Object`

#mscan(re) ⇒ `Object` Also known as: each_match

#natcmp(str2, caseInsensitive = false) ⇒ `Object`

#nchar(n, replacement = nil) ⇒ `Object`

#newlines(&block) ⇒ `Object`

#number? ⇒ `Boolean`

#object_state(data = nil) ⇒ `Object`

#pathize ⇒ `Object`

#quote(type = :double, count = nil) ⇒ `Object`

#range(pattern, offset = 0) ⇒ `Object`

#range_all(pattern, reuse = false) ⇒ `Object`

#range_of_line ⇒ `Object`

#remove(pattern) ⇒ `Object`

#remove!(pattern) ⇒ `Object`

#rewrite(rules) ⇒ `Object`

#roman ⇒ `Object`

#roman? ⇒ `Boolean`

#rotate(count = 1) ⇒ `Object`

#rotate!(count = 1) ⇒ `Object`

#shatter(re) ⇒ `Object`

#similarity(str_in) ⇒ `Object`

#snakecase ⇒ `Object` Also known as: underscore

#splice(idx, sub = nil) ⇒ `Object`

#squish ⇒ `Object`

#squish! ⇒ `Object`

#titlecase ⇒ `Object`

#to_b ⇒ `Object`

#to_date ⇒ `Object`

#to_datetime ⇒ `Object`

#to_re(esc = false) ⇒ `Object`

#to_rx(esc = true) ⇒ `Object`

#to_t(&yld) ⇒ `Object`

#to_time(form = :utc) ⇒ `Object`

#trim(num = 0) ⇒ `Object`

#unbracket(bra = nil, ket = nil) ⇒ `Object`

#unbracket!(bra = nil, ket = nil) ⇒ `Object`

#unfold ⇒ `Object`