Module: TextUtils::TitleTable
- Included in:
- TextUtils
- Defined in:
- lib/textutils/title.rb
Instance Method Summary collapse
- #build_title_table_for(records) ⇒ Object
- #find_key_for!(name, line) ⇒ Object
-
#find_keys_for!(name, line) ⇒ Object
NB: keys (plural!) - will return array.
- #map_title_worker_for!(name, line, key, values) ⇒ Object
- #map_titles_for!(name, line, title_table) ⇒ Object
Instance Method Details
#build_title_table_for(records) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/textutils/title.rb', line 32 def build_title_table_for( records ) LogUtils::Logger.root.info " build_title_table_for - deprecated API - use TitleMapper.new instead" ## build known tracks table w/ synonyms e.g. # # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]], # [ 'augsburg', [ 'FC Augsburg', 'Augi2', 'Augi3' ]], # [ 'stuttgart', [ 'VfB Stuttgart' ]] ] known_titles = [] records.each_with_index do |rec,index| title_candidates = [] title_candidates << rec.title title_candidates += rec.synonyms.split('|') if rec.synonyms.present? ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit) # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan titles = [] title_candidates.each do |t| titles << t if t =~ /\(.+\)/ extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles extra_title.strip! # strip leading n trailing withspaces too! titles << extra_title end end ## NB: sort here by length (largest goes first - best match) # exclude code and key (key should always go last) titles = titles.sort { |left,right| right.length <=> left.length } ## escape for regex plus allow subs for special chars/accents titles = titles.map { |title| TextUtils.title_esc_regex( title ) } ## NB: only include code field - if defined titles << rec.code if rec.respond_to?(:code) && rec.code.present? known_titles << [ rec.key, titles ] ### fix: use plain logger LogUtils::Logger.root.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<" end known_titles end |
#find_key_for!(name, line) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/textutils/title.rb', line 86 def find_key_for!( name, line ) LogUtils::Logger.root.info " find_key_for! #{name} - deprecated API - use TitleMapper.find_key! instead" regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@]) upcase_name = name.upcase downcase_name = name.downcase if line =~ regex value = "#{$1}" ### fix: use plain logger LogUtils::Logger.root.debug " #{downcase_name}: >#{value}<" line.sub!( regex, "[#{upcase_name}]" ) return $1 else return nil end end |
#find_keys_for!(name, line) ⇒ Object
NB: keys (plural!) - will return array
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/textutils/title.rb', line 108 def find_keys_for!( name, line ) # NB: keys (plural!) - will return array LogUtils::Logger.root.info " find_keys_for! #{name} - deprecated API - use TitleMapper.find_keys! instead" counter = 1 keys = [] downcase_name = name.downcase key = find_key_for!( "#{downcase_name}#{counter}", line ) while key.present? keys << key counter += 1 key = find_key_for!( "#{downcase_name}#{counter}", line ) end keys end |
#map_title_worker_for!(name, line, key, values) ⇒ Object
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/textutils/title.rb', line 138 def map_title_worker_for!( name, line, key, values ) downcase_name = name.downcase values.each do |value| ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9) ## (thus add it, allows match for Benfica Lis. for example - note . at the end) ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$ regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) if line =~ regex ### fix: use plain logger LogUtils::Logger.root.debug " match for #{downcase_name} >#{key}< >#{value}<" # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc. line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end return true # break out after first match (do NOT continue) end end return false end |
#map_titles_for!(name, line, title_table) ⇒ Object
127 128 129 130 131 132 133 134 135 |
# File 'lib/textutils/title.rb', line 127 def map_titles_for!( name, line, title_table ) LogUtils::Logger.root.info " map_titles_for! #{name} - deprecated API - use TitleMapper.map_titles! instead" title_table.each do |rec| key = rec[0] values = rec[1] map_title_worker_for!( name, line, key, values ) end end |