Module: BioCMerger
- Defined in:
- lib/simple_bioc/bioc_merger.rb
Class Method Summary collapse
- .adjust_relation_refid(obj, id_map) ⇒ Object
- .adjust_relation_refids(doc, id_map) ⇒ Object
- .blank?(text) ⇒ Boolean
- .choose_id(doc, id, id_map) ⇒ Object
- .copy_annotation(doc, dest, annotation, id_map) ⇒ Object
- .copy_annotations(doc, dest, src, id_map) ⇒ Object
- .copy_infons(dest, src) ⇒ Object
- .copy_relation(doc, dest, relation, id_map) ⇒ Object
- .copy_relations(doc, dest, src, id_map) ⇒ Object
- .copy_text(dest, src) ⇒ Object
- .merge(dest_collection, src_collection) ⇒ Object
Class Method Details
.adjust_relation_refid(obj, id_map) ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/simple_bioc/bioc_merger.rb', line 95 def adjust_relation_refid(obj, id_map) return if obj.nil? obj.relations.each do |r| next if r.original.nil? r.nodes.each do |n| new_id = id_map[n.refid] n.refid = new_id unless new_id.nil? n.adjust_ref end end end |
.adjust_relation_refids(doc, id_map) ⇒ Object
85 86 87 88 89 90 91 92 93 |
# File 'lib/simple_bioc/bioc_merger.rb', line 85 def adjust_relation_refids(doc, id_map) adjust_relation_refid(doc, id_map) doc.passages.each do |p| adjust_relation_refid(p, id_map) p.sentences.each do |s| adjust_relation_refid(s, id_map) end end end |
.blank?(text) ⇒ Boolean
216 217 218 |
# File 'lib/simple_bioc/bioc_merger.rb', line 216 def blank?(text) return text.nil? || text.empty? end |
.choose_id(doc, id, id_map) ⇒ Object
195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/simple_bioc/bioc_merger.rb', line 195 def choose_id(doc, id, id_map) new_id = id || "id" node = doc.find_node(new_id) until node.nil? do new_id = new_id + "_c" node = doc.find_node(new_id) end if new_id != id id_map[id] = new_id end return new_id end |
.copy_annotation(doc, dest, annotation, id_map) ⇒ Object
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/simple_bioc/bioc_merger.rb', line 157 def copy_annotation(doc, dest, annotation, id_map) new_a = nil need_add = true dest.annotations.each do |a| if a.id == annotation.id && a.text == annotation.text new_a = a need_add = false break end end if new_a.nil? new_a = SimpleBioC::Annotation.new(dest) new_a.id = choose_id(doc, annotation.id, id_map) new_a.text = annotation.text new_a.locations = [] end annotation.locations.each do |l| found = false new_a.locations.each do |old_l| if l.offset == old_l.offset && l.length == old_l.length found = true break end end unless found new_l = SimpleBioC::Location.new(new_a) new_l.offset = l.offset new_l.length = l.length new_a.locations << new_l end end copy_infons(new_a, annotation) if need_add dest.annotations << new_a end end |
.copy_annotations(doc, dest, src, id_map) ⇒ Object
114 115 116 117 118 119 |
# File 'lib/simple_bioc/bioc_merger.rb', line 114 def copy_annotations(doc, dest, src, id_map) return if src.nil? src.annotations.each do |a| copy_annotation(doc, dest, a, id_map) end end |
.copy_infons(dest, src) ⇒ Object
220 221 222 223 224 225 226 227 228 |
# File 'lib/simple_bioc/bioc_merger.rb', line 220 def copy_infons(dest, src) src.infons.each do |k, v| if dest.infons[k].nil? dest.infons[k] = v elsif dest.infons[k] != v end end end |
.copy_relation(doc, dest, relation, id_map) ⇒ Object
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
# File 'lib/simple_bioc/bioc_merger.rb', line 120 def copy_relation(doc, dest, relation, id_map) new_r = nil need_add = true dest.relations.each do |r| if r.id == relation.id new_r = r need_add = false break end end if new_r.nil? new_r = SimpleBioC::Relation.new(dest) new_r.id = choose_id(doc, relation.id, id_map) new_r.original = relation end relation.nodes.each do |n| found = false new_r.nodes.each do |old_n| if n.refid == old_n.refid && n.role == old_n.role found = true break end end unless found node = SimpleBioC::Node.new(new_r) node.refid = n.refid node.role = n.role new_r.nodes << node end end copy_infons(new_r, relation) if need_add dest.relations << new_r end end |
.copy_relations(doc, dest, src, id_map) ⇒ Object
107 108 109 110 111 112 |
# File 'lib/simple_bioc/bioc_merger.rb', line 107 def copy_relations(doc, dest, src, id_map) return if src.nil? src.relations.each do |r| copy_relation(doc, dest, r, id_map) end end |
.copy_text(dest, src) ⇒ Object
210 211 212 213 214 |
# File 'lib/simple_bioc/bioc_merger.rb', line 210 def copy_text(dest, src) if blank?(dest.text) && !blank?(src.text) dest.text = src.text end end |
.merge(dest_collection, src_collection) ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/simple_bioc/bioc_merger.rb', line 8 def merge(dest_collection, src_collection) errors = [] warnings = [] id_map = {} if dest_collection.documents.size != 1 || src_collection.documents.size != 1 warnings << 'Only the first documents will be merged' end doc_d = dest_collection.documents[0] doc_s = src_collection.documents[0] copy_infons(dest_collection, src_collection) dest_collection.source = src_collection.source if dest_collection.source.nil? || dest_collection.source.empty? dest_collection.date = src_collection.date if dest_collection.date.nil? || dest_collection.date.empty? dest_collection.key = src_collection.key if dest_collection.key.nil? || dest_collection.key.empty? copy_infons(doc_d, doc_s) copy_relations(doc_d, doc_d, doc_s, id_map) if doc_d.passages.size != doc_s.passages.size warnings << 'Passages will not be merged because the numbers of passages in documents are different' end doc_d.passages.each_with_index do |p_d, index| p_s = doc_s.passages[index] if p_d.nil? || p_s.nil? warnings << 'The number of sentences in pages should be the same' elsif blank?(p_d.text) && blank?(p_s.text) && p_d.sentences.size != p_s.sentences.size warnings << 'The number of sentences in pages should be the same' end end doc_d.passages.each_with_index do |p_d, index| p_s = doc_s.passages[index] next if p_d.nil? || p_s.nil? copy_relations(doc_d, p_d, p_s, id_map) if p_d.sentences.size == p_s.sentences.size p_d.sentences.each_with_index do |s_d, index| s_s = p_s.sentences[index] copy_infons(s_d, s_s) copy_text(s_d, s_s) copy_relations(doc_d, s_d, s_s, id_map) copy_annotations(doc_d, s_d, s_s, id_map) s_d.adjust_annotation_offsets end elsif p_d.sentences.size == 0 p_d.text = p_s.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text) p_s.sentences.each do |s| copy_relations(doc_d, p_d, s, id_map) copy_annotations(doc_d, p_d, s, id_map) end elsif p_s.sentences.size == 0 if p_d.sentences.size > 0 # dest has sentences, but src has only passages. p_d.text = p_d.sentences.map{|s| s.text}.join(" ") if blank?(p_d.text) p_d.sentences.each do |s| s.annotations.each do |a| a.clear_sentence p_d.annotations << a end s.relations.each do |r| r.clear_sentence p_d.relations << r end end p_d.sentences.clear else copy_text(p_d, p_s) end end copy_annotations(doc_d, p_d, p_s, id_map) p_d.adjust_annotation_offsets end puts warnings end |