Class: MESH::Tree
- Inherits:
-
Object
- Object
- MESH::Tree
- Defined in:
- lib/MESH/tree.rb
Constant Summary collapse
- @@default_locale =
:en_us
Instance Method Summary collapse
- #add_heading_to_hashes(mh) ⇒ Object
- #each ⇒ Object
- #entry_match_key(e) ⇒ Object
-
#find(unique_id) ⇒ Object
NO LONGER COVERED BY TESTS def translate(locale, tr) return if @locales.include? locale @headings.each_with_index do |h, i| h.set_original_heading(tr.translate(h.original_heading), locale) h.set_natural_language_name(tr.translate(h.natural_language_name), locale) h.set_summary(tr.translate(h.summary), locale) h.entries.each { |entry| h.entries(locale) << tr.translate(entry) } h.entries(locale).sort! end.
- #find_by_entry(entry) ⇒ Object
- #find_by_original_heading(heading) ⇒ Object
- #find_by_tree_number(tree_number) ⇒ Object
-
#initialize ⇒ Tree
constructor
A new instance of Tree.
- #linkify_summaries(&block) ⇒ Object
- #load_translation(locale) ⇒ Object
- #load_wikipedia ⇒ Object
- #match_in_text(text) ⇒ Object
- #where(conditions) ⇒ Object
Constructor Details
#initialize ⇒ Tree
Returns a new instance of Tree.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/MESH/tree.rb', line 7 def initialize @headings = [] @by_unique_id = {} @by_tree_number = {} @by_original_heading = {} @by_entry = {} @locales = [@@default_locale] filename = File.('../../../data/mesh_data_2014/d2014.bin.gz', __FILE__) gzipped_file = File.open(filename) file = Zlib::GzipReader.new(gzipped_file) lines = [] file.each_line do |line| case when line.match(/^\*NEWRECORD$/) unless lines.empty? mh = MESH::Heading.new(self, @@default_locale, lines) add_heading_to_hashes(mh) lines = [line] end else lines << line end end @headings.each do |heading| heading.connect_to_parents heading.connect_to_forward_references end end |
Instance Method Details
#add_heading_to_hashes(mh) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/MESH/tree.rb', line 41 def add_heading_to_hashes(mh) @headings << mh @by_unique_id[mh.unique_id] = mh @by_original_heading[mh.original_heading] = mh mh.tree_numbers.each do |tree_number| raise if @by_tree_number[tree_number] @by_tree_number[tree_number] = mh end match_headings = mh.entries.map { |e| entry_match_key(e) }.uniq match_headings.each do |entry| raise if @by_entry[entry] @by_entry[entry] = mh end end |
#each ⇒ Object
198 199 200 201 202 |
# File 'lib/MESH/tree.rb', line 198 def each for i in 0 ... @headings.size yield @headings[i] if @headings[i].useful end end |
#entry_match_key(e) ⇒ Object
56 57 58 |
# File 'lib/MESH/tree.rb', line 56 def entry_match_key(e) e.strip.upcase end |
#find(unique_id) ⇒ Object
NO LONGER COVERED BY TESTS def translate(locale, tr)
return if @locales.include? locale
@headings.each_with_index do |h, i|
h.set_original_heading(tr.translate(h.original_heading), locale)
h.set_natural_language_name(tr.translate(h.natural_language_name), locale)
h.set_summary(tr.translate(h.summary), locale)
h.entries.each { |entry| h.entries(locale) << tr.translate(entry) }
h.entries(locale).sort!
end
@locales << locale
end
174 175 176 |
# File 'lib/MESH/tree.rb', line 174 def find(unique_id) return @by_unique_id[unique_id] end |
#find_by_entry(entry) ⇒ Object
186 187 188 |
# File 'lib/MESH/tree.rb', line 186 def find_by_entry(entry) return @by_entry[entry_match_key(entry)] end |
#find_by_original_heading(heading) ⇒ Object
182 183 184 |
# File 'lib/MESH/tree.rb', line 182 def find_by_original_heading(heading) return @by_original_heading[heading] end |
#find_by_tree_number(tree_number) ⇒ Object
178 179 180 |
# File 'lib/MESH/tree.rb', line 178 def find_by_tree_number(tree_number) return @by_tree_number[tree_number] end |
#linkify_summaries(&block) ⇒ Object
154 155 156 157 158 |
# File 'lib/MESH/tree.rb', line 154 def linkify_summaries &block @headings.each do |h| h.linkify_summary &block end end |
#load_translation(locale) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/MESH/tree.rb', line 60 def load_translation(locale) return if @locales.include? locale filename = File.("../../../data/mesh_data_2014/d2014.#{locale}.bin.gz", __FILE__) gzipped_file = File.open(filename) file = Zlib::GzipReader.new(gzipped_file) entries = [] original_heading = nil natural_language_name = nil summary = nil unique_id = nil file.each_line do |line| case when line.match(/^\*NEWRECORD$/) unless unique_id.nil? entries.sort! entries.uniq! if heading = find(unique_id) heading.set_original_heading(original_heading, locale) unless original_heading.nil? heading.set_natural_language_name(natural_language_name, locale) unless natural_language_name.nil? heading.set_summary(summary, locale) unless summary.nil? entries.each { |entry| heading.entries(locale) << entry } end entries = [] original_heading = nil summary = nil unique_id = nil end when matches = line.match(/^UI = (.*)/) unique_id = matches[1] when matches = line.match(/^MS = (.*)/) summary = matches[1] when matches = line.match(/^MH = (.*)/) mh = matches[1] original_heading = mh entries << mh librarian_parts = mh.match(/(.*), (.*)/) natural_language_name = librarian_parts.nil? ? mh : "#{librarian_parts[2]} #{librarian_parts[1]}" when matches = line.match(/^(?:PRINT )?ENTRY = ([^|]+)/) entry = matches[1].chomp entries << entry end end @locales << locale end |
#load_wikipedia ⇒ Object
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/MESH/tree.rb', line 115 def load_wikipedia return if @wikipedia_loaded filename = File.("../../../data/mesh_data_2014/d2014.wikipedia.bin.gz", __FILE__) gzipped_file = File.open(filename) file = Zlib::GzipReader.new(gzipped_file) unique_id = nil wikipedia_links = [] file.each_line do |line| case when line.match(/^\*NEWRECORD$/) unless unique_id.nil? if heading = find(unique_id) wikipedia_links.each do |wl| wl[:score] = (wl[:score].to_f / heading.entries.length.to_f).round(2) end heading.wikipedia_links = wikipedia_links end wikipedia_links = [] unique_id = nil end when matches = line.match(/^UI = (.*)/) unique_id = matches[1] when matches = line.match(/^WK = (.*)/) hash = JSON.parse(matches[1], symbolize_names: true) wikipedia_links << hash end end @wikipedia_loaded = true end |
#match_in_text(text) ⇒ Object
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 |
# File 'lib/MESH/tree.rb', line 204 def match_in_text(text) return [] if text.nil? downcased = text.downcase matches = [] @headings.each do |heading| next unless heading.useful @locales.each do |locale| heading.entries(locale).each do |entry| if downcased.include? entry.downcase #This is a looser check than the regex but much, much faster if /^[A-Z0-9]+$/ =~ entry regex = /(^|\W)#{Regexp.quote(entry)}(\W|$)/ else regex = /(^|\W)#{Regexp.quote(entry)}(\W|$)/i end text.to_enum(:scan, regex).map do |m,| matches << {heading: heading, matched: entry, index: $`.size} end end end end end confirmed_matches = [] matches.combination(2) do |l, r| if (r[:index] >= l[:index]) && (r[:index] + r[:matched].length <= l[:index] + l[:matched].length) #r is within l r[:delete] = true elsif (l[:index] >= r[:index]) && (l[:index] + l[:matched].length <= r[:index] + r[:matched].length) #l is within r l[:delete] = true end end matches.delete_if { |match| match[:delete] } end |
#where(conditions) ⇒ Object
190 191 192 193 194 195 196 |
# File 'lib/MESH/tree.rb', line 190 def where(conditions) matches = [] @headings.each do |heading| matches << heading if heading.matches(conditions) end matches end |