Module: Taxonifi::Lumper::Lumps::ParentChildNameCollection
- Defined in:
- lib/taxonifi/lumper/lumps/parent_child_name_collection.rb
Overview
Handles DwC-esque files (e.g. as dumped by EoL), i.e. a file with columns like:
[identifier parent child rank synonyms]
Instantiates individual names for all names (including synonym lists) into a NameCollection. See ‘test/test_lumper_parent_child_name_collection’ for example use.
Class Method Summary collapse
-
.add_species_names_from_string(nc, string, parent = nil, synonym_id = nil) ⇒ Object
Add the last name in a species epithet string if new, record a new combination otherwise.
- .name_collection(csv) ⇒ Object
Class Method Details
.add_species_names_from_string(nc, string, parent = nil, synonym_id = nil) ⇒ Object
Add the last name in a species epithet string if new, record a new combination otherwise.
Assumes ALL parents have been previously added, including those used in Synonym combinations. For example, given a row with name, synonym fields like:
'Neortholomus scolopax (Say, 1832)', 'Lygaeus scolopax Say, 1832']
The names Neortholomus and Lygaeus must exist.
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/taxonifi/lumper/lumps/parent_child_name_collection.rb', line 97 def self.add_species_names_from_string(nc, string, parent = nil, synonym_id = nil) names = Taxonifi::Splitter::Builder.build_species_name(string) # A Taxonifi::Model::SpeciesName instance if !parent.nil? # nc.object_by_id(parent_id) names.names.last.parent = parent # swap out the parent with the id referenced by the parent_id else raise Taxonifi::Lumper::LumperError, "Parent of [#{names.names.last.name}] within [#{names.display_name}] not yet instantiated. \n !! To resolve: \n\t 1) If this is not a species name your file may be missing a value in the 'Rank' column (nil values are assumed to be species, all other ranks must be populated). \n\t 2) Parent names must be read before children, check that this is the case." end last_id = nil if !nc.name_exists?(names.names.last) last_id = nc.add_object(names.names.last).id nc.object_by_id(last_id). = nc.object_by_id(synonym_id) if !synonym_id.nil? else tmp_genus = names.genus.clone # tmp_subgenus = names.subgenus.clone if !names.subgenus.nil? tmp_species = names.species.clone tmp_subspecies = names.subspecies.clone if !names.subspecies.nil? case parent.rank when 'genus' tmp_genus.parent = parent.parent # OK when 'subgenus' tmp_genus.parent = parent.parent # OK when 'species' tmp_genus.parent = parent.parent.parent tmp_species = parent tmp_subspecies.parent = tmp_species end # tmp_subgenus.parent = tmp_genus if !tmp_subgenus.nil? # real_subgenus = nc.object_by_id(nc.name_exists?(tmp_subgenus)) if !tmp_subgenus.nil? real_genus = nc.object_by_id(nc.name_exists?(tmp_genus)) real_species = nc.object_by_id(nc.name_exists?(tmp_species)) # !! Existing demo data Lygaeoidea have synonyms in which the genus name is not instantiated. This might be a problem with DwC file # validation in general, something to look at, for now, throw up our hands and move on. return last_id if (real_genus.nil? || real_species.nil?) real_subgenus = nil # revisit real_subspecies = nc.object_by_id(nc.name_exists?(tmp_subspecies)) if !tmp_subspecies.nil? rc = [real_genus, real_subgenus, real_species, real_subspecies] nc.combinations.push rc end last_id end |
.name_collection(csv) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/taxonifi/lumper/lumps/parent_child_name_collection.rb', line 7 def self.name_collection(csv) raise Taxonifi::Lumper::LumperError, "CSV does not have the required headers (#{Taxonifi::Lumper::LUMPS[:eol_basic].join(", ")})." if !Taxonifi::Lumper.available_lumps(csv.headers).include?(:eol_basic) nc = Taxonifi::Model::NameCollection.new(:initial_id => 1) # identifier => Taxonifi::Name external_index = {} # Array of Hashes {:synonyms => "Name|Name1|Name2", :external_index => external_index[parent_id], :valid_species_id => valid_species_id}, {} ... synonym_list = [] csv.each_with_index do |row,i| name = row['child'] rank = row['rank'].downcase if !row['rank'].nil? parent_id = row['parent'].to_i external_id = row['identifier'].to_i valid_species_id = nil # Fix me index_rank = 'species_group' if rank == 'species' || rank == 'subspecies' index_rank = 'genus_group' if rank == 'subgenus' || rank == 'genus' index_rank ||= rank case rank when 'species', nil valid_species_id = add_species_names_from_string(nc, name, external_index[parent_id]) external_index.merge!(external_id => nc.object_by_id(valid_species_id)) else # Just a single string, we don't have to break anything down. n = nil if nc.by_name_index[index_rank][name] exists = false # TODO: this hasn't been hit yet nc.by_name_index[index_rank][name].each do |id| if nc.parent_id_vector(id).pop == nc.parent_id_vector(parent_id) exists = true break end end if !exists n = Taxonifi::Model::Name.new() end else n = Taxonifi::Model::Name.new() end # Build the name if !n.nil? # TODO: No author, year have yet been observed for genus and higher names n.rank = rank n.name = name n.row_number = i n.add_property(:external_id, external_id) if parent = external_index[parent_id] n.parent = parent end if !nc.name_exists?(n) nc.add_object(n) external_index.merge!(external_id => n) end end end if !row['synonyms'].nil? && row['synonyms'].size > 0 # puts n.name if external_index[parent_id].nil? synonym_list.push({:synonyms => row['synonyms'], :valid_species_id => valid_species_id, :external_index => external_index[parent_id]}) end end # end row # parse the synonyms last, because names might have been mixed synonym_list.each do |s| other_names = s[:synonyms].split("|") other_names.each do |n| # puts ":: #{n} :: #{s[:external_index]} :: #{s[:valid_species_id]}" if s[:external_index].nil? add_species_names_from_string(nc, n, s[:external_index], s[:valid_species_id]) end end nc end |