Class: MESH::Tree

Inherits:
Object
  • Object
show all
Defined in:
lib/MESH/tree.rb

Constant Summary collapse

@@default_locale =
:en_us

Instance Method Summary collapse

Constructor Details

#initializeTree

Returns a new instance of Tree.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/MESH/tree.rb', line 7

def initialize

  @headings = []
  @by_unique_id = {}
  @by_tree_number = {}
  @by_original_heading = {}
  @by_entry = {}
  @locales = [@@default_locale]

  filename = File.expand_path('../../../data/mesh_data_2014/d2014.bin.gz', __FILE__)
  gzipped_file = File.open(filename)
  file = Zlib::GzipReader.new(gzipped_file)

  lines = []
  file.each_line do |line|
    case
      when line.match(/^\*NEWRECORD$/)
        unless lines.empty?
          mh = MESH::Heading.new(self, @@default_locale, lines)
          add_heading_to_hashes(mh)
          lines = [line]
        end
      else
        lines << line
    end
  end

  @headings.each do |heading|
    heading.connect_to_parents
    heading.connect_to_forward_references
  end

end

Instance Method Details

#add_heading_to_hashes(mh) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/MESH/tree.rb', line 41

def add_heading_to_hashes(mh)
  @headings << mh
  @by_unique_id[mh.unique_id] = mh
  @by_original_heading[mh.original_heading] = mh
  mh.tree_numbers.each do |tree_number|
    raise if @by_tree_number[tree_number]
    @by_tree_number[tree_number] = mh
  end
  match_headings = mh.entries.map { |e| entry_match_key(e) }.uniq
  match_headings.each do |entry|
    raise if @by_entry[entry]
    @by_entry[entry] = mh
  end
end

#eachObject



198
199
200
201
202
# File 'lib/MESH/tree.rb', line 198

def each
  for i in 0 ... @headings.size
    yield @headings[i] if @headings[i].useful
  end
end

#entry_match_key(e) ⇒ Object



56
57
58
# File 'lib/MESH/tree.rb', line 56

def entry_match_key(e)
  e.strip.upcase
end

#find(unique_id) ⇒ Object

NO LONGER COVERED BY TESTS def translate(locale, tr)

return if @locales.include? locale
@headings.each_with_index do |h, i|
  h.set_original_heading(tr.translate(h.original_heading), locale)
  h.set_natural_language_name(tr.translate(h.natural_language_name), locale)
  h.set_summary(tr.translate(h.summary), locale)
  h.entries.each { |entry| h.entries(locale) << tr.translate(entry) }
  h.entries(locale).sort!
end

@locales << locale

end



174
175
176
# File 'lib/MESH/tree.rb', line 174

def find(unique_id)
  return @by_unique_id[unique_id]
end

#find_by_entry(entry) ⇒ Object



186
187
188
# File 'lib/MESH/tree.rb', line 186

def find_by_entry(entry)
  return @by_entry[entry_match_key(entry)]
end

#find_by_original_heading(heading) ⇒ Object



182
183
184
# File 'lib/MESH/tree.rb', line 182

def find_by_original_heading(heading)
  return @by_original_heading[heading]
end

#find_by_tree_number(tree_number) ⇒ Object



178
179
180
# File 'lib/MESH/tree.rb', line 178

def find_by_tree_number(tree_number)
  return @by_tree_number[tree_number]
end

#linkify_summaries(&block) ⇒ Object



154
155
156
157
158
# File 'lib/MESH/tree.rb', line 154

def linkify_summaries &block
  @headings.each do |h|
    h.linkify_summary &block
  end
end

#load_translation(locale) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/MESH/tree.rb', line 60

def load_translation(locale)
  return if @locales.include? locale
  filename = File.expand_path("../../../data/mesh_data_2014/d2014.#{locale}.bin.gz", __FILE__)
  gzipped_file = File.open(filename)
  file = Zlib::GzipReader.new(gzipped_file)

  entries = []
  original_heading = nil
  natural_language_name = nil
  summary = nil
  unique_id = nil
  file.each_line do |line|

    case

      when line.match(/^\*NEWRECORD$/)
        unless unique_id.nil?
          entries.sort!
          entries.uniq!
          if heading = find(unique_id)
            heading.set_original_heading(original_heading, locale) unless original_heading.nil?
            heading.set_natural_language_name(natural_language_name, locale) unless natural_language_name.nil?
            heading.set_summary(summary, locale) unless summary.nil?
            entries.each { |entry| heading.entries(locale) << entry }
          end

          entries = []
          original_heading = nil
          summary = nil
          unique_id = nil
        end

      when matches = line.match(/^UI = (.*)/)
        unique_id = matches[1]

      when matches = line.match(/^MS = (.*)/)
        summary = matches[1]

      when matches = line.match(/^MH = (.*)/)
        mh = matches[1]
        original_heading = mh
        entries << mh
        librarian_parts = mh.match(/(.*), (.*)/)
        natural_language_name = librarian_parts.nil? ? mh : "#{librarian_parts[2]} #{librarian_parts[1]}"

      when matches = line.match(/^(?:PRINT )?ENTRY = ([^|]+)/)
        entry = matches[1].chomp
        entries << entry

    end

  end
  @locales << locale
end

#load_wikipediaObject



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/MESH/tree.rb', line 115

def load_wikipedia
  return if @wikipedia_loaded
  filename = File.expand_path("../../../data/mesh_data_2014/d2014.wikipedia.bin.gz", __FILE__)
  gzipped_file = File.open(filename)
  file = Zlib::GzipReader.new(gzipped_file)

  unique_id = nil
  wikipedia_links = []
  file.each_line do |line|

    case

      when line.match(/^\*NEWRECORD$/)
        unless unique_id.nil?
          if heading = find(unique_id)
            wikipedia_links.each do |wl|
              wl[:score] = (wl[:score].to_f / heading.entries.length.to_f).round(2)
            end
            heading.wikipedia_links = wikipedia_links
          end

          wikipedia_links = []
          unique_id = nil
        end

      when matches = line.match(/^UI = (.*)/)
        unique_id = matches[1]

      when matches = line.match(/^WK = (.*)/)
        hash = JSON.parse(matches[1], symbolize_names: true)
        wikipedia_links << hash

    end

  end
  @wikipedia_loaded = true
end

#match_in_text(text) ⇒ Object



204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/MESH/tree.rb', line 204

def match_in_text(text)
  return [] if text.nil?
  downcased = text.downcase
  matches = []
  @headings.each do |heading|
    next unless heading.useful
    @locales.each do |locale|
      heading.entries(locale).each do |entry|
        if downcased.include? entry.downcase #This is a looser check than the regex but much, much faster
          if /^[A-Z0-9]+$/ =~ entry
            regex = /(^|\W)#{Regexp.quote(entry)}(\W|$)/
          else
            regex = /(^|\W)#{Regexp.quote(entry)}(\W|$)/i
          end
          text.to_enum(:scan, regex).map do |m,|
            matches << {heading: heading, matched: entry, index: $`.size}
          end
        end
      end
    end
  end
  confirmed_matches = []
  matches.combination(2) do |l, r|
    if (r[:index] >= l[:index]) && (r[:index] + r[:matched].length <= l[:index] + l[:matched].length)
      #r is within l
      r[:delete] = true
    elsif (l[:index] >= r[:index]) && (l[:index] + l[:matched].length <= r[:index] + r[:matched].length)
      #l is within r
      l[:delete] = true
    end
  end
  matches.delete_if { |match| match[:delete] }
end

#where(conditions) ⇒ Object



190
191
192
193
194
195
196
# File 'lib/MESH/tree.rb', line 190

def where(conditions)
  matches = []
  @headings.each do |heading|
    matches << heading if heading.matches(conditions)
  end
  matches
end