Class: SportDb::Import::ClubIndex

Inherits:
Object
  • Object
show all
Defined in:
lib/sportdb/config/club_index.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeClubIndex

Returns a new instance of ClubIndex.



34
35
36
37
38
# File 'lib/sportdb/config/club_index.rb', line 34

def initialize
  @clubs          = {}   ## clubs (indexed) by canonical name
  @clubs_by_name  = {}
  @errors         = []
end

Instance Attribute Details

#errorsObject (readonly)

Returns the value of attribute errors.



40
41
42
# File 'lib/sportdb/config/club_index.rb', line 40

def errors
  @errors
end

Class Method Details

.build(path) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/sportdb/config/club_index.rb', line 9

def self.build( path )
  recs = []
  datafiles = Configuration.find_datafiles_clubs( path )
  datafiles.each do |datafile|
    recs += ClubReader.read( datafile )
  end
  recs

  clubs = self.new
  clubs.add( recs )

  ## add wiki(pedia) anchored links
  recs = []
  datafiles = Configuration.find_datafiles_clubs_wiki( path )
  datafiles.each do |datafile|
     recs += WikiReader.read( datafile )
  end

  pp recs
  clubs.add_wiki( recs )
  clubs
end

Instance Method Details

#[](name) ⇒ Object

lookup by canoncial name only



163
164
165
# File 'lib/sportdb/config/club_index.rb', line 163

def []( name )    ## lookup by canoncial name only
  @clubs[ name ]
end

#add(rec_or_recs) ⇒ Object

add club record / alt_names



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/sportdb/config/club_index.rb', line 84

def add( rec_or_recs )   ## add club record / alt_names
  recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs]      ## wrap (single) rec in array

  recs.each do |rec|
    ## puts "adding:"
    ## pp rec
    ### step 1) add canonical name
    old_rec = @clubs[ rec.name ]
    if old_rec
      puts "** !!! ERROR !!! - (canonical) name conflict - duplicate - >#{rec.name}< will overwrite >#{old_rec.name}<:"
      pp old_rec
      pp rec
      exit 1
    else
      @clubs[ rec.name ] = rec
    end

    ## step 2) add all names (canonical name + alt names + alt names (auto))
    names = [rec.name] + rec.alt_names
    more_names = []
    ## check "hand-typed" names for year (auto-add)
    ## check for year(s) e.g. (1887-1911), (-2013),
    ##                        (1946-2001,2013-) etc.
    names.each do |name|
      if has_year?( name )
        more_names <<  strip_year( name )
      end
    end

    names += more_names
    ## check for duplicates - simple check for now - fix/improve
    ## todo/fix: (auto)remove duplicates - why? why not?
    count      = names.size
    count_uniq = names.uniq.size
    if count != count_uniq
      puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):"
      pp names
      pp rec
      exit 1
    end

    ## check with auto-names just warn for now and do not exit
    names += rec.alt_names_auto
    count      = names.size
    count_uniq = names.uniq.size
    if count != count_uniq
      puts "** !!! WARN !!! - #{count-count_uniq} duplicate name(s):"
      pp names
      pp rec
    end


    names.each_with_index do |name,i|
      ## check lang codes e.g. [en], [fr], etc.
      ##  todo/check/fix:  move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
      name = strip_lang( name )
      norm = normalize( name )
      alt_recs = @clubs_by_name[ norm ]
      if alt_recs
        ## check if include club rec already or is new club rec
        if alt_recs.include?( rec )
          ## note: do NOT include duplicate club record
          msg = "** !!! WARN !!! - (norm) name conflict/duplicate for club - >#{name}< normalized to >#{norm}< already included >#{rec.name}, #{rec.country.name}<"
          puts msg
          @errors << msg
        else
          msg = "** !!! WARN !!! - name conflict/duplicate - >#{name}< will overwrite >#{alt_recs[0].name}, #{alt_recs[0].country.name}< with >#{rec.name}, #{rec.country.name}<"
          puts msg
          @errors << msg
          alt_recs << rec
        end
      else
        @clubs_by_name[ norm ] = [rec]
      end
    end
  end
end

#add_wiki(rec_or_recs) ⇒ Object

add wiki(pedia club record / links



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/sportdb/config/club_index.rb', line 55

def add_wiki( rec_or_recs )   ## add wiki(pedia club record / links
  recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs]      ## wrap (single) rec in array

  recs.each do |rec|
    ## note: strip qualifier () from wikipedia page name if present
    ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
    ##      Willem II (football club)  => Willem II
    ##
    ## e.g. do NOT strip others !! e.g.
    ##   América Futebol Clube (MG)
    ##  only add more "special" cases on demand (that, is) if we find more
    name = strip_wiki( rec.name )

    m = match_by( name: name, country: rec.country )
    if m.nil?
      puts "** !!! ERROR !!! - no matching club found for wiki(pedia) name >#{name}, #{rec.country.name} (#{rec.country.key})<; sorry - to fix add name to clubs"
      exit 1
    end
    if m.size > 1
      puts "** !!! ERROR !!! - too many (greater than one) matching clubs found for wiki(pedia) name >#{name}, #{rec.country.name} (#{rec.country.key})<"
      pp m
      exit 1
    end
    club = m[0]
    club.wikipedia = rec.name
  end
end

#clubsObject



44
# File 'lib/sportdb/config/club_index.rb', line 44

def clubs()    @clubs.values;  end

#dump_duplicatesObject

debug helper - report duplicate club name records



200
201
202
203
204
205
206
207
# File 'lib/sportdb/config/club_index.rb', line 200

def dump_duplicates # debug helper - report duplicate club name records
   @clubs_by_name.each do |name, clubs|
     if clubs.size > 1
       puts "#{clubs.size} matching club duplicates for >#{name}<:"
       pp clubs
     end
   end
end

#errors?Boolean

Returns:

  • (Boolean)


41
# File 'lib/sportdb/config/club_index.rb', line 41

def errors?() @errors.empty? == false; end

#has_year?(name) ⇒ Boolean

Returns:

  • (Boolean)


49
# File 'lib/sportdb/config/club_index.rb', line 49

def has_year?( name)   Club.has_year?( name ); end

#mappingsObject

todo/check: rename to index or something - why? why not?



43
# File 'lib/sportdb/config/club_index.rb', line 43

def mappings() @clubs_by_name; end

#match(name) ⇒ Object



167
168
169
170
171
# File 'lib/sportdb/config/club_index.rb', line 167

def match( name )
  ## todo/check: return empty array if no match!!! and NOT nil (add || []) - why? why not?
  name = normalize( name )
  @clubs_by_name[ name ]
end

#match_by(name:, country:) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/sportdb/config/club_index.rb', line 174

def match_by( name:, country: )
  ## note: match must for now always  include name
  m = match( name )
  if m    ## filter by country
    ## note: country assumes / allows the country key or fifa code for now

    ## note: allow passing in of country struct too
    country_rec = if country.is_a?( SportDb::Import::Country )
                     country   ## (re)use country struct - no need to run lookup again
                  else
                     rec = SportDb::Import.config.countries[ country ]
                     if rec.nil?
                       puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
                       exit 1
                     end
                     rec
                  end

    m = m.select { |club| club.country.key == country_rec.key }
    m = nil   if m.empty?     ## note: reset to nil if no more matches
  end
  m
end

#normalize(name) ⇒ Object



52
# File 'lib/sportdb/config/club_index.rb', line 52

def normalize( name )  Club.normalize( name ); end

#strip_lang(name) ⇒ Object



50
# File 'lib/sportdb/config/club_index.rb', line 50

def strip_lang( name ) Club.strip_lang( name ); end

#strip_wiki(name) ⇒ Object



51
# File 'lib/sportdb/config/club_index.rb', line 51

def strip_wiki( name ) Club.strip_wiki( name ); end

#strip_year(name) ⇒ Object

helpers from club - use a helper module for includes - why? why not?



48
# File 'lib/sportdb/config/club_index.rb', line 48

def strip_year( name ) Club.strip_year( name ); end