Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/law_string.rb

Overview

Specialized #titleize and #add_typography methods.

Constant Summary collapse

NOT_CAPITALIZED =

For AP Style Titles

Set.new %w[a an and at but by for in nor of on or out so the to up yet].freeze
INITIALISMS =
Set.new %w[atm].freeze
INITIALS_REGEX =
/^([a-zA-Z]\.)+$/.freeze
WHITESPACES_REGEX =
/[[:space:]]/.freeze

Instance Method Summary collapse

Instance Method Details

#add_html_typographyObject



97
98
99
# File 'lib/law_string.rb', line 97

def add_html_typography
  gsub(%r{\b(\d+)/(\d+)\b}, '<sup>\1</sup>&frasl;<sub>\2</sub>')
end

#add_typographyObject

Return a new string enhanced with UTF-8 typographic characters:

Single quotes: 


91
92
93
94
95
# File 'lib/law_string.rb', line 91

def add_typography
  tr("'", '’')
    .gsub(/\bSec\./, '§')
    .gsub(/"([^"]+)"/, '“\1”')
end

#capitalize_first_letterObject



109
110
111
112
113
114
115
# File 'lib/law_string.rb', line 109

def capitalize_first_letter
  return '' if self == ''

  new_string = clone(freeze: false)
  new_string.capitalize_first_letter!
  new_string
end

#capitalize_first_letter!Object



117
118
119
120
121
# File 'lib/law_string.rb', line 117

def capitalize_first_letter!
  return self if self == ''

  self[0] = self[0].upcase
end

#in?(an_array) ⇒ Boolean

Returns:

  • (Boolean)


15
16
17
# File 'lib/law_string.rb', line 15

def in?(an_array)
  an_array.include?(self)
end

#initialism?Boolean

Returns:

  • (Boolean)


23
24
25
# File 'lib/law_string.rb', line 23

def initialism?
  INITIALISMS.include?(self)
end

#initials?Boolean

Returns:

  • (Boolean)


19
20
21
# File 'lib/law_string.rb', line 19

def initials?
  INITIALS_REGEX.match?(self)
end

#md5_sumObject



31
32
33
# File 'lib/law_string.rb', line 31

def md5_sum
  Digest::MD5.hexdigest(self)
end

#starts_with?(str) ⇒ Boolean

Returns:

  • (Boolean)


27
28
29
# File 'lib/law_string.rb', line 27

def starts_with?(str)
  start_with?(str)
end

#tailObject



74
75
76
# File 'lib/law_string.rb', line 74

def tail
  self[1..-1]
end

#titleizeObject

A better titleize that creates a usable title according to English grammar rules. It’s coded to reduce object allocation.



39
40
41
42
43
44
45
46
47
48
# File 'lib/law_string.rb', line 39

def titleize
  new_string = clone(freeze: false)

  new_string.tr!('_', ' ')
  final_string = new_string.split(WHITESPACES_REGEX)
                           .map { |w| titleize_word(w) }
                           .join(' ')
  final_string.capitalize_first_letter!
  final_string
end

#titleize!Object

Replace my value with the titleized version.



81
82
83
# File 'lib/law_string.rb', line 81

def titleize!
  replace titleize
end

#titleize_word(word) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/law_string.rb', line 50

def titleize_word(word)
  if word.start_with?('"', '(', '[', '{')
    extra = word[0]
    word  = word.tail
  else
    extra = ''
  end

  word.downcase!
  if NOT_CAPITALIZED.include?(word)
    # Do nothing
  elsif word.initials? || word.initialism?
    word.upcase!
  else
    word.capitalize!
  end

  if extra == ''
    word
  else
    extra + word
  end
end

#utf8_safeObject

Take text with potential encoding problems and aggressively make it safe for UTF-8 import.



105
106
107
# File 'lib/law_string.rb', line 105

def utf8_safe
  encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
end