Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/law_string.rb

Overview

Specialized #titleize and #add_typography methods.

Constant Summary collapse

NOT_CAPITALIZED =

For AP Style Titles A decent article: prowritingaid.com/list-of-words-not-capitalized-in-titles

Set.new %w[
  a
  an
  and
  as
  at
  but
  by
  down
  for
  from
  if
  in
  into
  near
  nor 
  of
  on
  onto
  or
  over
  out 
  so 
  than
  that
  the
  to
  up
  with 
  yet
].freeze
INITIALISMS =
Set.new %w[atm usa].freeze
INITIALS_REGEX =
/^([a-zA-Z]\.)+$/.freeze
WHITESPACES_REGEX =
/[[:space:]]/.freeze

Instance Method Summary collapse

Instance Method Details

#add_html_typographyObject



129
130
131
# File 'lib/law_string.rb', line 129

def add_html_typography
  gsub(%r{\b(\d+)/(\d+)\b}, '<sup>\1</sup>&frasl;<sub>\2</sub>')
end

#add_typographyObject

Return a new string enhanced with UTF-8 typographic characters:

Single quotes: 


123
124
125
126
127
# File 'lib/law_string.rb', line 123

def add_typography
  tr("'", '’')
    .gsub(/\bSec\./, '§')
    .gsub(/"([^"]+)"/, '“\1”')
end

#capitalize_first_letterObject



141
142
143
144
145
146
147
# File 'lib/law_string.rb', line 141

def capitalize_first_letter
  return '' if self == ''

  new_string = clone(freeze: false)
  new_string.capitalize_first_letter!
  new_string
end

#capitalize_first_letter!Object



149
150
151
152
153
# File 'lib/law_string.rb', line 149

def capitalize_first_letter!
  return self if self == ''

  self[0] = self[0].upcase
end

#in?(an_array) ⇒ Boolean

Returns:

  • (Boolean)


47
48
49
# File 'lib/law_string.rb', line 47

def in?(an_array)
  an_array.include?(self)
end

#initialism?Boolean

Returns:

  • (Boolean)


55
56
57
# File 'lib/law_string.rb', line 55

def initialism?
  INITIALISMS.include?(self)
end

#initials?Boolean

Returns:

  • (Boolean)


51
52
53
# File 'lib/law_string.rb', line 51

def initials?
  INITIALS_REGEX.match?(self)
end

#md5_sumObject



63
64
65
# File 'lib/law_string.rb', line 63

def md5_sum
  Digest::MD5.hexdigest(self)
end

#starts_with?(str) ⇒ Boolean

Returns:

  • (Boolean)


59
60
61
# File 'lib/law_string.rb', line 59

def starts_with?(str)
  start_with?(str)
end

#tailObject



106
107
108
# File 'lib/law_string.rb', line 106

def tail
  self[1..-1]
end

#titleizeObject

A better titleize that creates a usable title according to English grammar rules. It’s coded to reduce object allocation.



71
72
73
74
75
76
77
78
79
80
# File 'lib/law_string.rb', line 71

def titleize
  new_string = clone(freeze: false)

  new_string.tr!('_', ' ')
  final_string = new_string.split(WHITESPACES_REGEX)
                           .map { |w| titleize_word(w) }
                           .join(' ')
  final_string.capitalize_first_letter!
  final_string
end

#titleize!Object

Replace my value with the titleized version.



113
114
115
# File 'lib/law_string.rb', line 113

def titleize!
  replace titleize
end

#titleize_word(word) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/law_string.rb', line 82

def titleize_word(word)
  if word.start_with?('"', '(', '[', '{')
    extra = word[0]
    word  = word.tail
  else
    extra = ''
  end

  word.downcase!
  if NOT_CAPITALIZED.include?(word)
    # Do nothing
  elsif word.initials? || word.initialism?
    word.upcase!
  else
    word.capitalize!
  end

  if extra == ''
    word
  else
    extra + word
  end
end

#utf8_safeObject

Take text with potential encoding problems and aggressively make it safe for UTF-8 import.



137
138
139
# File 'lib/law_string.rb', line 137

def utf8_safe
  encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
end