Class: SportDb::Import::ClubIndex
- Inherits:
-
Object
- Object
- SportDb::Import::ClubIndex
- Defined in:
- lib/sportdb/config/club_index.rb
Instance Attribute Summary collapse
-
#errors ⇒ Object
readonly
Returns the value of attribute errors.
Class Method Summary collapse
Instance Method Summary collapse
-
#[](name) ⇒ Object
lookup by canoncial name only.
-
#add(rec_or_recs) ⇒ Object
add club record / alt_names.
-
#add_wiki(rec_or_recs) ⇒ Object
add wiki(pedia club record / links.
- #clubs ⇒ Object
-
#dump_duplicates ⇒ Object
debug helper - report duplicate club name records.
- #errors? ⇒ Boolean
- #has_year?(name) ⇒ Boolean
-
#initialize ⇒ ClubIndex
constructor
A new instance of ClubIndex.
-
#mappings ⇒ Object
todo/check: rename to index or something - why? why not?.
- #match(name) ⇒ Object
- #match_by(name:, country:) ⇒ Object
- #normalize(name) ⇒ Object
- #strip_lang(name) ⇒ Object
- #strip_wiki(name) ⇒ Object
-
#strip_year(name) ⇒ Object
helpers from club - use a helper module for includes - why? why not?.
Constructor Details
#initialize ⇒ ClubIndex
Returns a new instance of ClubIndex.
34 35 36 37 38 |
# File 'lib/sportdb/config/club_index.rb', line 34 def initialize @clubs = {} ## clubs (indexed) by canonical name @clubs_by_name = {} @errors = [] end |
Instance Attribute Details
#errors ⇒ Object (readonly)
Returns the value of attribute errors.
40 41 42 |
# File 'lib/sportdb/config/club_index.rb', line 40 def errors @errors end |
Class Method Details
.build(path) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/sportdb/config/club_index.rb', line 9 def self.build( path ) recs = [] datafiles = Configuration.find_datafiles_clubs( path ) datafiles.each do |datafile| recs += ClubReader.read( datafile ) end recs clubs = self.new clubs.add( recs ) ## add wiki(pedia) anchored links recs = [] datafiles = Configuration.find_datafiles_clubs_wiki( path ) datafiles.each do |datafile| recs += WikiReader.read( datafile ) end pp recs clubs.add_wiki( recs ) clubs end |
Instance Method Details
#[](name) ⇒ Object
lookup by canoncial name only
163 164 165 |
# File 'lib/sportdb/config/club_index.rb', line 163 def []( name ) ## lookup by canoncial name only @clubs[ name ] end |
#add(rec_or_recs) ⇒ Object
add club record / alt_names
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/sportdb/config/club_index.rb', line 84 def add( rec_or_recs ) ## add club record / alt_names recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array recs.each do |rec| ## puts "adding:" ## pp rec ### step 1) add canonical name old_rec = @clubs[ rec.name ] if old_rec puts "** !!! ERROR !!! - (canonical) name conflict - duplicate - >#{rec.name}< will overwrite >#{old_rec.name}<:" pp old_rec pp rec exit 1 else @clubs[ rec.name ] = rec end ## step 2) add all names (canonical name + alt names + alt names (auto)) names = [rec.name] + rec.alt_names more_names = [] ## check "hand-typed" names for year (auto-add) ## check for year(s) e.g. (1887-1911), (-2013), ## (1946-2001,2013-) etc. names.each do |name| if has_year?( name ) more_names << strip_year( name ) end end names += more_names ## check for duplicates - simple check for now - fix/improve ## todo/fix: (auto)remove duplicates - why? why not? count = names.size count_uniq = names.uniq.size if count != count_uniq puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):" pp names pp rec exit 1 end ## check with auto-names just warn for now and do not exit names += rec.alt_names_auto count = names.size count_uniq = names.uniq.size if count != count_uniq puts "** !!! WARN !!! - #{count-count_uniq} duplicate name(s):" pp names pp rec end names.each_with_index do |name,i| ## check lang codes e.g. [en], [fr], etc. ## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not? name = strip_lang( name ) norm = normalize( name ) alt_recs = @clubs_by_name[ norm ] if alt_recs ## check if include club rec already or is new club rec if alt_recs.include?( rec ) ## note: do NOT include duplicate club record msg = "** !!! WARN !!! - (norm) name conflict/duplicate for club - >#{name}< normalized to >#{norm}< already included >#{rec.name}, #{rec.country.name}<" puts msg @errors << msg else msg = "** !!! WARN !!! - name conflict/duplicate - >#{name}< will overwrite >#{alt_recs[0].name}, #{alt_recs[0].country.name}< with >#{rec.name}, #{rec.country.name}<" puts msg @errors << msg alt_recs << rec end else @clubs_by_name[ norm ] = [rec] end end end end |
#add_wiki(rec_or_recs) ⇒ Object
add wiki(pedia club record / links
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/sportdb/config/club_index.rb', line 55 def add_wiki( rec_or_recs ) ## add wiki(pedia club record / links recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array recs.each do |rec| ## note: strip qualifier () from wikipedia page name if present ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck ## Willem II (football club) => Willem II ## ## e.g. do NOT strip others !! e.g. ## América Futebol Clube (MG) ## only add more "special" cases on demand (that, is) if we find more name = strip_wiki( rec.name ) m = match_by( name: name, country: rec.country ) if m.nil? puts "** !!! ERROR !!! - no matching club found for wiki(pedia) name >#{name}, #{rec.country.name} (#{rec.country.key})<; sorry - to fix add name to clubs" exit 1 end if m.size > 1 puts "** !!! ERROR !!! - too many (greater than one) matching clubs found for wiki(pedia) name >#{name}, #{rec.country.name} (#{rec.country.key})<" pp m exit 1 end club = m[0] club.wikipedia = rec.name end end |
#clubs ⇒ Object
44 |
# File 'lib/sportdb/config/club_index.rb', line 44 def clubs() @clubs.values; end |
#dump_duplicates ⇒ Object
debug helper - report duplicate club name records
200 201 202 203 204 205 206 207 |
# File 'lib/sportdb/config/club_index.rb', line 200 def dump_duplicates # debug helper - report duplicate club name records @clubs_by_name.each do |name, clubs| if clubs.size > 1 puts "#{clubs.size} matching club duplicates for >#{name}<:" pp clubs end end end |
#errors? ⇒ Boolean
41 |
# File 'lib/sportdb/config/club_index.rb', line 41 def errors?() @errors.empty? == false; end |
#has_year?(name) ⇒ Boolean
49 |
# File 'lib/sportdb/config/club_index.rb', line 49 def has_year?( name) Club.has_year?( name ); end |
#mappings ⇒ Object
todo/check: rename to index or something - why? why not?
43 |
# File 'lib/sportdb/config/club_index.rb', line 43 def mappings() @clubs_by_name; end |
#match(name) ⇒ Object
167 168 169 170 171 |
# File 'lib/sportdb/config/club_index.rb', line 167 def match( name ) ## todo/check: return empty array if no match!!! and NOT nil (add || []) - why? why not? name = normalize( name ) @clubs_by_name[ name ] end |
#match_by(name:, country:) ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/sportdb/config/club_index.rb', line 174 def match_by( name:, country: ) ## note: match must for now always include name m = match( name ) if m ## filter by country ## note: country assumes / allows the country key or fifa code for now ## note: allow passing in of country struct too country_rec = if country.is_a?( SportDb::Import::Country ) country ## (re)use country struct - no need to run lookup again else rec = SportDb::Import.config.countries[ country ] if rec.nil? puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config" exit 1 end rec end m = m.select { |club| club.country.key == country_rec.key } m = nil if m.empty? ## note: reset to nil if no more matches end m end |
#normalize(name) ⇒ Object
52 |
# File 'lib/sportdb/config/club_index.rb', line 52 def normalize( name ) Club.normalize( name ); end |
#strip_lang(name) ⇒ Object
50 |
# File 'lib/sportdb/config/club_index.rb', line 50 def strip_lang( name ) Club.strip_lang( name ); end |
#strip_wiki(name) ⇒ Object
51 |
# File 'lib/sportdb/config/club_index.rb', line 51 def strip_wiki( name ) Club.strip_wiki( name ); end |
#strip_year(name) ⇒ Object
helpers from club - use a helper module for includes - why? why not?
48 |
# File 'lib/sportdb/config/club_index.rb', line 48 def strip_year( name ) Club.strip_year( name ); end |