Module: PreProcessor
- Defined in:
- lib/biodiversity/parser.rb
Constant Summary collapse
- NOTES =
/\s+(species\s+group|species\s+complex|group|author)\b.*$/i
- TAXON_CONCEPTS1 =
/\s+(sensu\.|sensu|auct\.|auct)\b.*$/i
- TAXON_CONCEPTS2 =
/\s+ (\(?s\.\s?s\.| \(?s\.\s?l\.| \(?s\.\s?str\.| \(?s\.\s?lat\.| sec\.|sec|near)\b.*$/x
- TAXON_CONCEPTS3 =
/(,\s*|\s+)(pro parte|p\.\s?p\.)\s*$/i
- NOMEN_CONCEPTS =
/(,\s*|\s+)(\(?nomen\b|\(?nom\.|\(?comb\.).*$/i
- LAST_WORD_JUNK =
/(,\s*|\s+) (spp\.|spp|var\.| var|von|van|ined\.| ined|sensu|new|non|nec| nudum|cf\.|cf|sp\.|sp| ssp\.|ssp|subsp|subgen|hybrid|hort\.|hort)\??\s*$/ix
Class Method Summary collapse
Class Method Details
.clean(a_string) ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/biodiversity/parser.rb', line 25 def self.clean(a_string) orig = a_string [NOTES, TAXON_CONCEPTS1, TAXON_CONCEPTS2, TAXON_CONCEPTS3, NOMEN_CONCEPTS, LAST_WORD_JUNK].each do |i| a_string = a_string.gsub(i, "") end tail = orig[a_string.size..-1] a_string = a_string.tr("ſ","s") #old "s" a_string = a_string.tr("_", " ") if a_string.strip.match(/\s/).nil? [a_string, tail.strip] end |