Module: SportDb::NameHelper

Included in:
Sports::Team
Defined in:
lib/sportdb/structs/name_helper.rb

Constant Summary collapse

YEAR_RE =

note: allow placeholder years to e.g. (-_) or (-????)

  for marking missing (to be filled in) years
e.g. (1887-1911), (-2013),
    (1946-2001, 2013-) etc.
todo/check: make more strict  e.g. only accept 4-digit years? - why? why not?
%r{\(
    [0-9, ?_-]+?     # note: non-greedy (minimum/first) match
\)}x
LANG_RE =
%r{\[
[a-z]{1,2}    # note also allow single-letter [a] or [d] or [e] - why? why not?
\]}x
NORM_RE =

note: also add (),’,− etc. e.g.

 Estudiantes (LP) => Estudiantes LP
 Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
 Myllykosken Pallo −47 => Myllykosken Pallo 47

add & too!!
 e.g. Brighton & Hove Albion => Brighton Hove Albion  -- and others in England
%r{
  [.'’º/()&_−-]
}x

Instance Method Summary collapse

Instance Method Details

#has_lang?(name) ⇒ Boolean

Returns:

  • (Boolean)


32
# File 'lib/sportdb/structs/name_helper.rb', line 32

def has_lang?( name ) name =~ LANG_RE; end

#has_year?(name) ⇒ Boolean

Returns:

  • (Boolean)


22
# File 'lib/sportdb/structs/name_helper.rb', line 22

def has_year?( name ) name =~ YEAR_RE; end

#normalize(name) ⇒ Object



72
73
74
75
76
77
78
79
80
# File 'lib/sportdb/structs/name_helper.rb', line 72

def normalize( name )
  # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
  name = strip_norm( name )
  name = name.gsub( ' ', '' )  # note: also remove all spaces!!!

  ## todo/check: use our own downcase - why? why not?
  name = downcase_i18n( name )     ## do NOT care about upper and lowercase for now
  name
end

#sanitize(name) ⇒ Object



35
36
37
38
39
40
41
42
# File 'lib/sportdb/structs/name_helper.rb', line 35

def sanitize( name )
  ## check for year(s) e.g. (1887-1911), (-2013),
  ##                        (1946-2001,2013-) etc.
  name = strip_year( name )
  ## check lang codes e.g. [en], [fr], etc.
  name = strip_lang( name )
  name
end

#strip_lang(name) ⇒ Object



28
29
30
# File 'lib/sportdb/structs/name_helper.rb', line 28

def strip_lang( name )
   name.gsub( LANG_RE, '' ).strip
end

#strip_norm(name) ⇒ Object

for norm(alizing) names



68
69
70
# File 'lib/sportdb/structs/name_helper.rb', line 68

def strip_norm( name )
  name.gsub( NORM_RE, '' )
end

#strip_year(name) ⇒ Object



15
16
17
18
19
20
# File 'lib/sportdb/structs/name_helper.rb', line 15

def strip_year( name )
  ## check for year(s) e.g. (1887-1911), (-2013),
  ##                        (1946-2001, 2013-) etc.
  ##  todo/check: only sub once (not global) - why? why not?
  name.gsub( YEAR_RE, '' ).strip
end

#variants(name) ⇒ Object



83
# File 'lib/sportdb/structs/name_helper.rb', line 83

def variants( name )  Variant.find( name ); end