Class: Taxonifi::Export::SpeciesFile
- Defined in:
- lib/taxonifi/export/format/species_file.rb
Overview
Dumps tables identical to the existing structure in SpeciesFile. Will only work in the pre Identity world. Will reconfigure as templates for Jim’s work after the fact.
Constant Summary collapse
- SPECIES_FILE_RANKS =
tblRanks 5/17/2012
{ 'variety' => 5, # there is no variety rank per se in SFs, they are handled this way according to DE 'subspecies' => 5, 'species' => 10, 'species subgroup' => 11, 'species group' => 12, 'species series' => 14, 'infragenus' => 16, 'subgenus' => 18, 'genus' => 20, 'genus group' => 22, 'subtribe' => 28, 'tribe' => 30, 'supertribe' => 32, 'infrafamily' => 36, 'subfamily' => 38, 'subfamily group' => 39, 'family' => 40, 'epifamily' => 41, 'superfamily' => 42, 'superfamily group' => 44, 'subinfraordinal group' => 45, 'infraorder' => 46, 'suborder' => 48, 'order' => 50, 'mirorder' => 51, 'superorder' => 52, 'magnorder' => 53, 'cohort' => 54, 'supercohort' => 55, 'infraclass' => 56, 'subclass' => 58, 'class' => 60, 'superclass' => 62, 'infraphylum' => 66, 'subphylum' => 68, 'phylum' => 70, 'superphylum' => 72, 'infrakingdom' => 76, 'subkingdom' => 78, 'kingdom' => 80, 'superkingdom' => 82, 'life' => 90, 'unknown' => 100 }
Constants inherited from Base
Base::EXPORT_BASE, Base::TAXRANKS
Instance Attribute Summary collapse
-
#authorized_user_id ⇒ Object
Returns the value of attribute authorized_user_id.
-
#built_nomenclators ⇒ Object
Returns the value of attribute built_nomenclators.
-
#genus_names ⇒ Object
Returns the value of attribute genus_names.
-
#name_collection ⇒ Object
Returns the value of attribute name_collection.
-
#nomenclator ⇒ Object
Returns the value of attribute nomenclator.
-
#pub_collection ⇒ Object
Returns the value of attribute pub_collection.
-
#ref_collection ⇒ Object
Returns the value of attribute ref_collection.
-
#species_names ⇒ Object
Returns the value of attribute species_names.
-
#time ⇒ Object
Returns the value of attribute time.
Attributes inherited from Base
#base_export_path, #export_folder
Instance Method Summary collapse
- #export ⇒ Object
-
#export_references(options = {}) ⇒ Object
Deprecated! Export only the ref_collection.
-
#get_ref(name) ⇒ Object
Gets the reference for a name as referenced by .properties.
-
#initialize(options = {}) ⇒ SpeciesFile
constructor
A new instance of SpeciesFile.
- #sql_for_genus_and_species_names_tables(type) ⇒ Object
-
#tblCites ⇒ Object
Generate tblCites string.
- #tblGenusNames ⇒ Object
-
#tblNomenclator ⇒ Object
Must be called post tblGenusNames and tblSpeciesNames.
-
#tblPeople ⇒ Object
Generate tblPeople string.
-
#tblPubs ⇒ Object
Generate tblPubs SQL.
-
#tblRefAuthors ⇒ Object
Generate tblRefAuthors string.
-
#tblRefs ⇒ Object
Generate a tblRefs string.
- #tblSpeciesNames ⇒ Object
- #tblTaxa ⇒ Object
-
#tblTypeSpecies ⇒ Object
Generate tblTypeSpecies string.
Methods inherited from Base
#configure_folders, #export_path, #new_output_file, #sanitize, #sql_insert_statement, #sqlize, #write_file
Constructor Details
#initialize(options = {}) ⇒ SpeciesFile
Returns a new instance of SpeciesFile.
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/taxonifi/export/format/species_file.rb', line 64 def initialize( = {}) opts = { :nc => Taxonifi::Model::NameCollection.new, :export_folder => 'species_file', :authorized_user_id => nil, :manifest => %w{tblPubs tblRefs tblPeople tblRefAuthors tblTaxa tblGenusNames tblSpeciesNames tblNomenclator tblCites tblTypeSpecies} }.merge!() @manifest = opts[:manifest] super(opts) raise Taxonifi::Export::ExportError, 'NameCollection not passed to SpeciesFile export.' if ! opts[:nc].class == Taxonifi::Model::NameCollection raise Taxonifi::Export::ExportError, 'You must provide authorized_user_id for species_file export initialization.' if opts[:authorized_user_id].nil? @name_collection = opts[:nc] @pub_collection = {} # title => id @authorized_user_id = opts[:authorized_user_id] # Careful here, at present we are just generating Reference micro-citations from our names, so the indexing "just works" # because it's all internal. There will is a strong potential for key collisions if this pipeline is modified to # include references external to the initialized name_collection. See also export_references. # # @by_author_reference_index = {} @genus_names = {} @species_names = {} @nomenclator = {} @time = Time.now.strftime("%F %T") @empty_quotes = "" end |
Instance Attribute Details
#authorized_user_id ⇒ Object
Returns the value of attribute authorized_user_id.
60 61 62 |
# File 'lib/taxonifi/export/format/species_file.rb', line 60 def @authorized_user_id end |
#built_nomenclators ⇒ Object
Returns the value of attribute built_nomenclators.
62 63 64 |
# File 'lib/taxonifi/export/format/species_file.rb', line 62 def built_nomenclators @built_nomenclators end |
#genus_names ⇒ Object
Returns the value of attribute genus_names.
59 60 61 |
# File 'lib/taxonifi/export/format/species_file.rb', line 59 def genus_names @genus_names end |
#name_collection ⇒ Object
Returns the value of attribute name_collection.
56 57 58 |
# File 'lib/taxonifi/export/format/species_file.rb', line 56 def name_collection @name_collection end |
#nomenclator ⇒ Object
Returns the value of attribute nomenclator.
59 60 61 |
# File 'lib/taxonifi/export/format/species_file.rb', line 59 def nomenclator @nomenclator end |
#pub_collection ⇒ Object
Returns the value of attribute pub_collection.
58 59 60 |
# File 'lib/taxonifi/export/format/species_file.rb', line 58 def pub_collection @pub_collection end |
#ref_collection ⇒ Object
Returns the value of attribute ref_collection.
57 58 59 |
# File 'lib/taxonifi/export/format/species_file.rb', line 57 def ref_collection @ref_collection end |
#species_names ⇒ Object
Returns the value of attribute species_names.
59 60 61 |
# File 'lib/taxonifi/export/format/species_file.rb', line 59 def species_names @species_names end |
#time ⇒ Object
Returns the value of attribute time.
60 61 62 |
# File 'lib/taxonifi/export/format/species_file.rb', line 60 def time @time end |
Instance Method Details
#export ⇒ Object
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/taxonifi/export/format/species_file.rb', line 94 def export() super # You must have # how to create and link the reference IDs. # Reference related approaches: # # @name_collection.generate_ref_collection(1) # Give authors unique ids: # @name_collection.ref_collection.uniquify_authors(1) # raise Taxonifi::Export::ExportError, 'NameCollection has no RefCollection, you might try @name_collection.generate_ref_collection(1), or alter the manifest: hash.' if ! @name_collection.ref_collection.nil? # See notes in #initalize re potential key collisions! # @by_author_reference_index = @name_collection.ref_collection.collection.inject({}){|hsh, r| hsh.merge!(r.author_year_index => r)} @name_collection.names_at_rank('genus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)} @name_collection.names_at_rank('subgenus').inject(@genus_names){|hsh, n| hsh.merge!(n.name => nil)} @name_collection.names_at_rank('species').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)} @name_collection.names_at_rank('subspecies').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)} @name_collection.names_at_rank('variety').inject(@species_names){|hsh, n| hsh.merge!(n.name => nil)} # Add combinations of names from nomenclators/citations as well @name_collection.nomenclators.keys.each do |k| @genus_names.merge!(@name_collection.nomenclators[k][0] => nil) @genus_names.merge!(@name_collection.nomenclators[k][1] => nil) @species_names.merge!(@name_collection.nomenclators[k][2] => nil) @species_names.merge!(@name_collection.nomenclators[k][3] => nil) @species_names.merge!(@name_collection.nomenclators[k][4] => nil) end @genus_names.delete_if{|key,value| key.nil? || key.length == 0} @species_names.delete_if{|key,value| key.nil? || key.length == 0} str = [ 'BEGIN TRY', 'BEGIN TRANSACTION'] @manifest.each do |f| str << send(f) end str << ['COMMIT', 'END TRY', 'BEGIN CATCH', 'SELECT ERROR_LINE() AS ErrorLine, ERROR_NUMBER() AS ErrorNumber, ERROR_MESSAGE() AS ErrorMessage;', 'ROLLBACK', 'END CATCH'] write_file('everything.sql', str.join("\n\n")) true end |
#export_references(options = {}) ⇒ Object
Deprecated! Export only the ref_collection. Sidesteps the main name-centric exports Note that this still uses the base @name_collection object as a starting reference, it just references @name_collection.ref_collection. So you can do:
nc = Taxonifi::Model::NameCollection.new
nc.ref_collection = Taxonifi::Model::RefCollection.new
etc.
148 149 150 151 |
# File 'lib/taxonifi/export/format/species_file.rb', line 148 def export_references( = {}) raise Taxonifi::Export::ExportError, 'Method deprecated, alter manifest to achieve a similar result.' #configure_folders end |
#get_ref(name) ⇒ Object
Gets the reference for a name as referenced by .properties
155 156 157 158 159 160 161 |
# File 'lib/taxonifi/export/format/species_file.rb', line 155 def get_ref(name) # if not name.properties[:link_to_ref_from_row].nil? # return @name_collection.ref_collection.object_from_row(name.properties[:link_to_ref_from_row]) # end # nil name.original_description_reference ? name.original_description_reference : nil end |
#sql_for_genus_and_species_names_tables(type) ⇒ Object
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 |
# File 'lib/taxonifi/export/format/species_file.rb', line 390 def sql_for_genus_and_species_names_tables(type) sql = [] col = "#{type}NameID" @headers = [col, "Name", "LastUpdate", "ModifiedBy", "Italicize"] var = self.send("#{type.downcase}_names") var.keys.each_with_index do |n,i| var[n] = i + 1 cols = { col.to_sym => i + 1, Name: n, LastUpdate: @time, ModifiedBy: @authorized_user_id, Italicize: 1 # always true for these data } sql << sql_insert_statement("tbl#{type}Names", cols) end sql.join("\n") end |
#tblCites ⇒ Object
Generate tblCites string.
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 |
# File 'lib/taxonifi/export/format/species_file.rb', line 317 def tblCites @headers = %w{TaxonNameID SeqNum RefID NomenclatorID LastUpdate ModifiedBy NewNameStatus CitePages Note TypeClarification CurrentConcept ConceptChange InfoFlags InfoFlagStatus PolynomialStatus} sql = [] @name_collection.citations.keys.each do |name_id| seq_num = 1 @name_collection.citations[name_id].each do |ref_id, nomenclator_index, properties| cols = { TaxonNameID: name_id, SeqNum: seq_num, RefID: ref_id, NomenclatorID: nomenclator_index, LastUpdate: @time, ModifiedBy: @authorized_user_id, CitePages: (properties[:cite_pages] ? properties[:cite_pages] : @empty_quotes), NewNameStatus: 0, Note: (properties[:note] ? properties[:note] : @empty_quotes), TypeClarification: 0, # We might derive more data from this CurrentConcept: (properties[:current_concept] == true ? 1 : 0), # Boolean, right? ConceptChange: 0, # Unspecified InfoFlags: 0, # InfoFlagStatus: 1, # 1 => needs review PolynomialStatus: 0 } sql << sql_insert_statement('tblCites', cols) seq_num += 1 end end sql.join("\n") end |
#tblGenusNames ⇒ Object
376 377 378 379 380 381 |
# File 'lib/taxonifi/export/format/species_file.rb', line 376 def tblGenusNames # TODO: SF tests catch unused names based on some names not being included in Nomeclator data. We could optimize so that the work around is removed. # I.e., all the names get added here, not all the names get added to Nomclator/Cites because of citations which are not original combinations sql = sql_for_genus_and_species_names_tables('Genus') sql end |
#tblNomenclator ⇒ Object
Must be called post tblGenusNames and tblSpeciesNames. Some records are not used but can be cleaned by SF
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 |
# File 'lib/taxonifi/export/format/species_file.rb', line 411 def tblNomenclator @headers = %w{NomenclatorID GenusNameID SubgenusNameID SpeciesNameID SubspeciesNameID LastUpdate ModifiedBy SuitableForGenus SuitableForSpecies InfrasubspeciesNameID InfrasubKind} sql = [] i = 1 # Ugh, move build from here @name_collection.nomenclators.keys.each do |i| name = @name_collection.nomenclators[i] genus_id = @genus_names[name[0]] genus_id ||= 0 subgenus_id = @genus_names[name[1]] subgenus_id ||= 0 species_id = @species_names[name[2]] species_id ||= 0 subspecies_id = @species_names[name[3]] subspecies_id ||= 0 variety_id = @species_names[name[4]] variety_id ||= 0 cols = { NomenclatorID: i, GenusNameID: genus_id, SubgenusNameID: subgenus_id, SpeciesNameID: species_id, SubspeciesNameID: subspecies_id, InfrasubspeciesNameID: variety_id, InfrasubKind: (variety_id == 0 ? 0 : 2), LastUpdate: @time, ModifiedBy: @authorized_user_id, SuitableForGenus: 0, # Set in SF w test SuitableForSpecies: 0 # Set in SF w test } i += 1 sql << sql_insert_statement('tblNomenclator', cols) end sql.join("\n") end |
#tblPeople ⇒ Object
Generate tblPeople string.
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 |
# File 'lib/taxonifi/export/format/species_file.rb', line 277 def tblPeople @headers = %w{PersonID FamilyName GivenNames GivenInitials Suffix Role LastUpdate ModifiedBy} sql = [] @name_collection.ref_collection..each do |a| cols = { PersonID: a.id, FamilyName: (a.last_name.length > 0 ? a.last_name : "Unknown"), GivenNames: a.first_name || @empty_quotes, GivenInitials: a.initials_string || @empty_quotes, Suffix: a.suffix || @empty_quotes, Role: 1, # authors LastUpdate: @time, ModifiedBy: @authorized_user_id } sql << sql_insert_statement('tblPeople', cols) end sql.join("\n") end |
#tblPubs ⇒ Object
Generate tblPubs SQL
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 |
# File 'lib/taxonifi/export/format/species_file.rb', line 245 def tblPubs sql = [] @headers = %w{PubID PrefID PubType ShortName FullName Note LastUpdate ModifiedBy Publisher PlacePublished PubRegID Status StartYear EndYear BHL} # Hackish should build this elsewhere, but degrades OK pubs = @name_collection.ref_collection.collection.collect{|r| r.publication}.compact.uniq pubs.each_with_index do |p, i| cols = { PubID: i + 1, PrefID: 0, PubType: 1, ShortName: "unknown_#{i}", # Unique constraint FullName: p, Note: @empty_quotes, LastUpdate: @time, ModifiedBy: @authorized_user_id, Publisher: @empty_quotes, PlacePublished: @empty_quotes, PubRegID: 0, Status: 0, StartYear: 0, EndYear: 0, BHL: 0 } @pub_collection.merge!(p => i + 1) sql << sql_insert_statement('tblPubs', cols) end sql.join("\n") end |
#tblRefAuthors ⇒ Object
Generate tblRefAuthors string.
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 |
# File 'lib/taxonifi/export/format/species_file.rb', line 297 def tblRefAuthors @headers = %w{RefID PersonID SeqNum AuthorCount LastUpdate ModifiedBy} sql = [] @name_collection.ref_collection.collection.each do |r| r..each_with_index do |x, i| cols = { RefID: r.id, PersonID: x.id, SeqNum: i + 1, AuthorCount: r..size + 1, LastUpdate: @time, ModifiedBy: @authorized_user_id } sql << sql_insert_statement('tblRefAuthors', cols) end end sql.join("\n") end |
#tblRefs ⇒ Object
Generate a tblRefs string.
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
# File 'lib/taxonifi/export/format/species_file.rb', line 202 def tblRefs sql = [] @headers = %w{RefID ActualYear Title PubID Verbatim} @name_collection.ref_collection.collection.each_with_index do |r,i| # Assumes the 0 "null" pub id is there pub_id = @pub_collection[r.publication] ? @pub_collection[r.publication] : 0 # Build a note based on "unused" properties note = [] if r.properties r.properties.keys.each do |k| note.push "#{k}: #{r.properties[k]}" if r.properties[k] && r.properties.length > 0 end end note = note.join("; ") note = @empty_quotes if note.length == 0 cols = { RefID: r.id, ContainingRefID: 0, Title: (r.title.nil? ? @empty_quotes : r.title), PubID: pub_id, Series: @empty_quotes, Volume: (r.volume ? r.volume : @empty_quotes), Issue: (r.number ? r.number : @empty_quotes), RefPages: r.page_string, # always a strings ActualYear: (r.year ? r.year : @empty_quotes), StatedYear: @empty_quotes, AccessCode: 0, Flags: 0, Note: note, LastUpdate: @time, LinkID: 0, ModifiedBy: @authorized_user_id, CiteDataStatus: 0, Verbatim: (r.full_citation ? r.full_citation : @empty_quotes) } sql << sql_insert_statement('tblRefs', cols) end sql.join("\n") end |
#tblSpeciesNames ⇒ Object
383 384 385 386 387 388 |
# File 'lib/taxonifi/export/format/species_file.rb', line 383 def tblSpeciesNames # TODO: SF tests catch unused names based on some names not being included in Nomeclator data. We could optimize so that the work around is removed. # I.e., all the names get added here, not all the names get added to Nomclator/Cites because of citations which are not original combinations sql = sql_for_genus_and_species_names_tables('Species') sql end |
#tblTaxa ⇒ Object
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
# File 'lib/taxonifi/export/format/species_file.rb', line 163 def tblTaxa @headers = %w{TaxonNameID TaxonNameStr RankID Name Parens AboveID RefID DataFlags AccessCode Extinct NameStatus StatusFlags OriginalGenusID LastUpdate ModifiedBy} sql = [] sql_above = [] # Need to add by rank for FK constraint handling Taxonifi::RANKS.each do |rank| @name_collection.names_at_rank(rank).each do |n| $DEBUG && $stderr.puts("#{n.name} is too long") if n.name.length > 30 # ref = get_ref(n) cols = { TaxonNameID: n.id, TaxonNameStr: n.parent_ids_sf_style, # closure -> ends with 1 RankID: SPECIES_FILE_RANKS[n.rank], Name: n.name, Parens: (n.parens ? 1 : 0), AboveID: 0, RefID: (n.original_description_reference ? n.original_description_reference.id : 0), DataFlags: 0, # see http://software.speciesfile.org/Design/TaxaTables.aspx#Taxon, a flag populated when data is reviewed, initialize to zero AccessCode: 0, Extinct: (n.properties && n.properties['extinct'] == 'true' ? 1 : 0), NameStatus: (n..nil? ? 0 : 7), # 0 :valid, 7: synonym) StatusFlags: (n..nil? ? 0 : 262144), # 0 :valid, 262144: jr. synonym OriginalGenusID: (n.properties && !n.properties['original_genus_id'].nil? ? n.properties['original_genus_id'] : 0), # SF must be pre-configured with 0 filler (this restriction needs to go) LastUpdate: @time, ModifiedBy: @authorized_user_id, } sql << sql_insert_statement('tblTaxa', cols) above_id = (n..nil? ? (n.parent ? n.parent.id : 0) : n..id) sql_above.push "UPDATE tblTaxa SET AboveID = #{above_id} where TaxonNameID = #{n.id};" end end sql.join("\n") + sql_above.join("\n") end |
#tblTypeSpecies ⇒ Object
Generate tblTypeSpecies string.
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 |
# File 'lib/taxonifi/export/format/species_file.rb', line 349 def tblTypeSpecies @headers = %w{GenusNameID SpeciesNameID Reason AuthorityRefID FirstFamGrpNameID LastUpdate ModifiedBy NewID} sql = [] names = @name_collection.names_at_rank('genus') + @name_collection.names_at_rank('subgenus') names.each do |n| if n.properties[:type_species_id] ref = get_ref(n) # ref = @by_author_reference_index[n.author_year_index] next if ref.nil? cols = { GenusNameID: n.id , SpeciesNameID: n.properties[:type_species_id], Reason: 0 , AuthorityRefID: 0 , FirstFamGrpNameID: 0 , LastUpdate: @time , ModifiedBy: @authorized_user_id , NewID: 0 # What is this? } sql << sql_insert_statement('tblTypeSpecies', cols) end end sql.join("\n") end |