Class: SportDb::Import::WikiIndex

Inherits:
Object
  • Object
show all
Includes:
NameHelper
Defined in:
lib/sportdb/config/wiki_index.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(recs) ⇒ WikiIndex

Returns a new instance of WikiIndex.



41
42
43
44
45
46
47
48
49
50
# File 'lib/sportdb/config/wiki_index.rb', line 41

def initialize( recs )
  @pages_by_country = {}

  ## todo/fix:
  ##   check for duplicate recs - report and exit on dupliate!!!!!!
  recs.each do |rec|
    h = @pages_by_country[ rec.country.key ] ||= {}
    h[ normalize( strip_wiki( rec.name )) ] = rec
  end
end

Class Method Details

.build(path) ⇒ Object



9
10
11
12
13
14
15
16
17
18
# File 'lib/sportdb/config/wiki_index.rb', line 9

def self.build( path )
  pack = Package.new( path )
  recs = []
  pack.each_clubs_wiki do |entry|
    recs += WikiReader.parse( entry.read )
  end
  recs

  new( recs )
end

Instance Method Details

#find_by(club:) ⇒ Object

todo/check: use find_by_club - why? why not?



53
54
55
# File 'lib/sportdb/config/wiki_index.rb', line 53

def find_by( club: )    ## todo/check: use find_by_club - why? why not?
  find_by_club( club )
end

#find_by_club(club) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/sportdb/config/wiki_index.rb', line 57

def find_by_club( club )
  rec = nil

  ## get query params from club
  names   = [club.name]+club.alt_names
  country_key = club.country.key

  h = @pages_by_country[ country_key ]
  if h
    ## todo/check: sort names ?
    ##   sort by longest first (for best match)
    names.each do |name|
      ## note: normalize AND sanitize (e.g. remove/string year and lang e.g. (1946-2001), [en] too)
      rec = h[ normalize( strip_year( strip_lang( name ))) ]
      break if rec   ## bingo!! found - break on first match
    end
  end

  rec  ## note: return nil if nothing found
end

#strip_wiki(name) ⇒ Object

fix/todo:

also used / duplicated in ClubIndex#add_wiki !!!


26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/sportdb/config/wiki_index.rb', line 26

def strip_wiki( name )     # todo/check: rename to strip_wikipedia_en - why? why not?
  ## note: strip disambiguationn qualifier from wikipedia page name if present
  ##        note: only remove year and foot... for now
  ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
  ##      Willem II (football club)  => Willem II
  ##
  ## e.g. do NOT strip others !! e.g.
  ##   América Futebol Clube (MG)
  ##  only add more "special" cases on demand (that, is) if we find more
  name = name.gsub( /\([12][^\)]+?\)/, '' ).strip  ## starting with a digit 1 or 2 (assuming year)
  name = name.gsub( /\(foot[^\)]+?\)/, '' ).strip  ## starting with foot (assuming football ...)
  name
end