Module: PreProcessor

Defined in:
lib/biodiversity/parser.rb

Constant Summary collapse

NOTES =
/\s+(species\s+group|species\s+complex|group|author)\b.*$/i
TAXON_CONCEPTS1 =
/\s+(sensu\.|sensu|auct\.|auct)\b.*$/i
TAXON_CONCEPTS2 =
/\s+
 (\(?s\.\s?s\.|
 \(?s\.\s?l\.|
 \(?s\.\s?str\.|
 \(?s\.\s?lat\.|
sec\.|sec|near)\b.*$/x
TAXON_CONCEPTS3 =
/(,\s*|\s+)(pro parte|p\.\s?p\.)\s*$/i
NOMEN_CONCEPTS =
/(,\s*|\s+)(\(?nomen\b|\(?nom\.|\(?comb\.).*$/i
LAST_WORD_JUNK =
/(,\s*|\s+)
                    (spp\.|spp|var\.|
var|von|van|ined\.|
ined|sensu|new|non|nec|
nudum|cf\.|cf|sp\.|sp|
ssp\.|ssp|subsp|subgen|hybrid|hort\.|hort)\??\s*$/ix

Class Method Summary collapse

Class Method Details

.clean(a_string) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
# File 'lib/biodiversity/parser.rb', line 25

def self.clean(a_string)
  orig = a_string
  [NOTES, TAXON_CONCEPTS1, TAXON_CONCEPTS2,
   TAXON_CONCEPTS3, NOMEN_CONCEPTS, LAST_WORD_JUNK].each do |i|
    a_string = a_string.gsub(i, "")
  end
  tail = orig[a_string.size..-1]
  a_string = a_string.tr("ſ","s") #old "s"
  a_string = a_string.tr("_", " ") if a_string.strip.match(/\s/).nil?
  [a_string, tail.strip]
end