Class: DwcaHunter::ResourceWikispecies

Inherits:
Resource
  • Object
show all
Defined in:
lib/dwca_hunter/resources/wikispecies.rb

Instance Attribute Summary

Attributes inherited from Resource

#abbr, #command, #download_path, #title, #url, #uuid

Instance Method Summary collapse

Methods inherited from Resource

gunzip, #needs_download?, #needs_unpack?, unzip

Constructor Details

#initialize(opts = {}) ⇒ ResourceWikispecies

Returns a new instance of ResourceWikispecies.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/dwca_hunter/resources/wikispecies.rb', line 5

def initialize(opts = {})
  @wikisp_path = File.join(Dir.tmpdir, "dwca_hunter", "wikispecies")
  @problems_file = open(File.join(Dir.tmpdir, "problems.txt"), "w:utf-8")
  @command = "wikispecies"
  @title = "Wikispecies"
  @url = "http://dumps.wikimedia.org/specieswiki/latest/" \
         "specieswiki-latest-pages-articles.xml.bz2"
  @url = opts[:url] if opts[:url]
  @uuid = "68923690-0727-473c-b7c5-2ae9e601e3fd"
  @download_path = File.join(@wikisp_path, "data.xml.bz2")
  @data = []
  @templates = {}
  @taxon_ids = {}
  @tree = {}
  @paths = {}
  @extensions = []
  @re = {
    page_start: /^\s*\<page\>\s*$/,
    page_end: %r{^\s*\</page\>\s*$},
    template: /Template:/i,
    template_link: /\{\{([^\}]*)\}\}/,
    vernacular_names: /\{\{\s*VN\s*\|([^\}]+)\}\}/i
  }
  super(opts)
end

Instance Method Details

#downloadObject



31
32
33
34
# File 'lib/dwca_hunter/resources/wikispecies.rb', line 31

def download
  puts "Downloading from the source"
  `curl -L #{@url} -o #{@download_path}`
end

#make_dwcaObject



40
41
42
43
44
# File 'lib/dwca_hunter/resources/wikispecies.rb', line 40

def make_dwca
  enrich_data
  extend_classification
  generate_dwca
end

#unpackObject



36
37
38
# File 'lib/dwca_hunter/resources/wikispecies.rb', line 36

def unpack
  unpack_bz2
end