Module: TextUtils::TitleTable

Included in:
TextUtils
Defined in:
lib/sportdb/title.rb

Instance Method Summary collapse

Instance Method Details

#build_title_table_for(records) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/sportdb/title.rb', line 16

def build_title_table_for( records )
    ## build known tracks table w/ synonyms e.g.
    #
    # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]],
    #  [ 'augsburg',  [ 'FC Augsburg', 'Augi2', 'Augi3' ]],
    #  [ 'stuttgart', [ 'VfB Stuttgart' ]] ]

    known_titles = []

    records.each_with_index do |rec,index|

      title_candidates = []
      title_candidates << rec.title

      title_candidates += rec.synonyms.split('|') if rec.synonyms.present?


      ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
      #  make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan

      titles = []
      title_candidates.each do |t|
        titles << t
        if t =~ /\(.+\)/
          extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
          extra_title.strip!   # strip leading n trailing withspaces too!
          titles << extra_title
        end
      end


      ## NB: sort here by length (largest goes first - best match)
      #  exclude code and key (key should always go last)
      titles = titles.sort { |left,right| right.length <=> left.length }
      
      ## escape for regex plus allow subs for special chars/accents
      titles = titles.map { |title| TextUtils.title_esc_regex( title )  }

      ## NB: only include code field - if defined
      titles << rec.code          if rec.respond_to?(:code) && rec.code.present?

      known_titles << [ rec.key, titles ]

      ### fix: use plain logger
      LogUtils::Logger.root.debug "  #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
    end

    known_titles
end

#find_key_for!(name, line) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/sportdb/title.rb', line 68

def find_key_for!( name, line )
  regex = /@@oo([^@]+?)oo@@/     # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])

  upcase_name   = name.upcase
  downcase_name = name.downcase

  if line =~ regex
    value = "#{$1}"
    ### fix: use plain logger
    LogUtils::Logger.root.debug "   #{downcase_name}: >#{value}<"
      
    line.sub!( regex, "[#{upcase_name}]" )

    return $1
  else
    return nil
  end
end

#find_keys_for!(name, line) ⇒ Object

NB: keys (plural!) - will return array



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/sportdb/title.rb', line 88

def find_keys_for!( name, line )  # NB: keys (plural!) - will return array
  counter = 1
  keys = []

  downcase_name = name.downcase

  key = find_key_for!( "#{downcase_name}#{counter}", line )
  while key.present?
    keys << key
    counter += 1
    key = find_key_for!( "#{downcase_name}#{counter}", line )
  end

  keys
end

#map_title_worker_for!(name, line, key, values) ⇒ Object



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/sportdb/title.rb', line 114

def map_title_worker_for!( name, line, key, values )

  downcase_name = name.downcase

  values.each do |value|
    ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
    ## (thus add it, allows match for Benfica Lis.  for example - note . at the end)

    ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
    regex = /\b#{value}(\b| |\t|$)/   # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) 
    if line =~ regex
      ### fix: use plain logger
      LogUtils::Logger.root.debug "     match for #{downcase_name}  >#{key}< >#{value}<"
      # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
      line.sub!( regex, "@@oo#{key}oo@@ " )    # NB: add one space char at end
      return true    # break out after first match (do NOT continue)
    end
  end
  return false
end

#map_titles_for!(name, line, title_table) ⇒ Object



105
106
107
108
109
110
111
# File 'lib/sportdb/title.rb', line 105

def map_titles_for!( name, line, title_table )
  title_table.each do |rec|
    key    = rec[0]
    values = rec[1]
    map_title_worker_for!( name, line, key, values )
  end
end