Class: SportDb::Import::Club

Inherits:
Object
  • Object
show all
Defined in:
lib/sportdb/config/club.rb

Overview

note: use our own (internal) club struct for now - why? why not?

- check that shape/structure/fields/attributes match
  the Team struct in sportdb-text (in SportDb::Struct::Team)  !!!!

Constant Summary collapse

YEAR_REGEX =

“global” helper - move to _ ? why? why not?

/\([0-9,\- ]+?\)/
LANG_REGEX =

note also allow [a] or [d] or [e] - why? why not?

/\[[a-z]{1,2}\]/
NORM_REGEX =
/[.'º\-\/]/

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeClub

Returns a new instance of Club.



40
41
42
43
# File 'lib/sportdb/config/club.rb', line 40

def initialize
  @alt_names      = []
  @alt_names_auto = []
end

Instance Attribute Details

#alt_namesObject

todo: use just names for alt_names - why? why not?



12
13
14
# File 'lib/sportdb/config/club.rb', line 12

def alt_names
  @alt_names
end

#alt_names_autoObject

special import only attribs



19
20
21
# File 'lib/sportdb/config/club.rb', line 19

def alt_names_auto
  @alt_names_auto
end

#cityObject

todo: use just names for alt_names - why? why not?



12
13
14
# File 'lib/sportdb/config/club.rb', line 12

def city
  @city
end

#countryObject

more attribs - todo/fix - also add “upstream” to struct & model!!!!!



16
17
18
# File 'lib/sportdb/config/club.rb', line 16

def country
  @country
end

#districtObject

more attribs - todo/fix - also add “upstream” to struct & model!!!!!



16
17
18
# File 'lib/sportdb/config/club.rb', line 16

def district
  @district
end

#geosObject

more attribs - todo/fix - also add “upstream” to struct & model!!!!!



16
17
18
# File 'lib/sportdb/config/club.rb', line 16

def geos
  @geos
end

#groundObject

todo: use just names for alt_names - why? why not?



12
13
14
# File 'lib/sportdb/config/club.rb', line 12

def ground
  @ground
end

#nameObject

todo: use just names for alt_names - why? why not?



12
13
14
# File 'lib/sportdb/config/club.rb', line 12

def name
  @name
end

#wikipediaObject

wikipedia page name (for english (en))



20
21
22
# File 'lib/sportdb/config/club.rb', line 20

def wikipedia
  @wikipedia
end

#yearObject

todo: use just names for alt_names - why? why not?



12
13
14
# File 'lib/sportdb/config/club.rb', line 12

def year
  @year
end

#year_endObject

more attribs - todo/fix - also add “upstream” to struct & model!!!!!



16
17
18
# File 'lib/sportdb/config/club.rb', line 16

def year_end
  @year_end
end

Class Method Details

.has_lang?(name) ⇒ Boolean

Returns:

  • (Boolean)


91
# File 'lib/sportdb/config/club.rb', line 91

def self.has_lang?( name ) name =~ LANG_REGEX; end

.has_year?(name) ⇒ Boolean

Returns:

  • (Boolean)


84
# File 'lib/sportdb/config/club.rb', line 84

def self.has_year?( name ) name =~ YEAR_REGEX; end

.normalize(name) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
# File 'lib/sportdb/config/club.rb', line 110

def self.normalize( name )
  # note: do NOT call sanitize here (keep normalize "atomic" for reuse)

  ## remove all dots (.), dash (-), º, /, etc.
  name = strip_norm( name )
  name = name.gsub( ' ', '' )  # note: also remove all spaces!!!

  ## todo/fix: use our own downcase - why? why not?
  name = downcase_i18n( name )     ## do NOT care about upper and lowercase for now
  name
end

.sanitize(name) ⇒ Object



93
94
95
96
97
98
99
100
# File 'lib/sportdb/config/club.rb', line 93

def self.sanitize( name )
  ## check for year(s) e.g. (1887-1911), (-2013),
  ##                        (1946-2001,2013-) etc.
  name = strip_year( name )
  ## check lang codes e.g. [en], [fr], etc.
  name = strip_lang( name )
  name
end

.strip_lang(name) ⇒ Object



87
88
89
# File 'lib/sportdb/config/club.rb', line 87

def self.strip_lang( name )
  name.gsub( LANG_REGEX, '' ).strip
end

.strip_norm(name) ⇒ Object

note: remove all dots (.), dash (-), ‘, º, /, etc.

for norm(alizing) names


106
107
108
# File 'lib/sportdb/config/club.rb', line 106

def self.strip_norm( name )
  name.gsub( NORM_REGEX, '' )
end

.strip_wiki(name) ⇒ Object

todo/check: rename to strip_wikipedia_en - why? why not?



123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/sportdb/config/club.rb', line 123

def self.strip_wiki( name )     # todo/check: rename to strip_wikipedia_en - why? why not?
  ## note: strip disambiguationn qualifier from wikipedia page name if present
  ##        note: only remove year and foot... for now
  ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
  ##      Willem II (football club)  => Willem II
  ##
  ## e.g. do NOT strip others !! e.g.
  ##   América Futebol Clube (MG)
  ##  only add more "special" cases on demand (that, is) if we find more
  name = name.gsub( /\([12][^\)]+?\)/, '' ).strip  ## starting with a digit 1 or 2 (assuming year)
  name = name.gsub( /\(foot[^\)]+?\)/, '' ).strip  ## starting with foot (assuming football ...)
  name
end

.strip_year(name) ⇒ Object



78
79
80
81
82
# File 'lib/sportdb/config/club.rb', line 78

def self.strip_year( name )
  ## check for year(s) e.g. (1887-1911), (-2013),
  ##                        (1946-2001, 2013-) etc.
  name.gsub( YEAR_REGEX, '' ).strip
end

Instance Method Details

#add_variants(name_or_names) ⇒ Object



66
67
68
69
70
71
72
# File 'lib/sportdb/config/club.rb', line 66

def add_variants( name_or_names )
  names = name_or_names.is_a?(Array) ? name_or_names : [name_or_names]
  names.each do |name|
    name = sanitize( name )
    self.alt_names_auto += variants( name )
  end
end

#duplicatesObject



55
56
57
58
59
60
61
62
63
64
# File 'lib/sportdb/config/club.rb', line 55

def duplicates
  names = [name] + alt_names + alt_names_auto

  ## calculate (count) frequency and select if greater than one
  names.reduce( Hash.new ) do |h,name|
     norm = normalize( sanitize(name) )
     h[norm] ||= []
     h[norm] << name; h
  end.select { |norm,names| names.size > 1 }
end

#duplicates?Boolean

helper methods for import only check for duplicates

Returns:

  • (Boolean)


48
49
50
51
52
53
# File 'lib/sportdb/config/club.rb', line 48

def duplicates?
  names = [name] + alt_names + alt_names_auto
  names = names.map { |name| normalize( sanitize(name) ) }

  names.size != names.uniq.size
end

#historic?Boolean Also known as: past?

Returns:

  • (Boolean)


22
# File 'lib/sportdb/config/club.rb', line 22

def historic?()  @year_end ? true : false; end

#wikipedia?Boolean

Returns:

  • (Boolean)


26
# File 'lib/sportdb/config/club.rb', line 26

def wikipedia?()  @wikipedia; end

#wikipedia_urlObject



27
28
29
30
31
32
33
34
35
36
37
# File 'lib/sportdb/config/club.rb', line 27

def wikipedia_url
  if @wikipedia
    ##  note: replace spaces with underscore (-)
    ##  e.g. Club Brugge KV => Club_Brugge_KV
    ##  todo/check/fix:
    ##    check if "plain" dash (-) needs to get replaced with typographic dash??
    "https://en.wikipedia.org/wiki/#{@wikipedia.gsub(' ','_')}"
  else
    nil
  end
end