Class: DwcaHunter::ResourceWikispecies

Inherits:
Resource
  • Object
show all
Defined in:
lib/dwca_hunter/resources/wikispecies.rb

Instance Attribute Summary

Attributes inherited from Resource

#abbr, #command, #download_path, #title, #url, #uuid

Instance Method Summary collapse

Methods inherited from Resource

#download, gunzip, #needs_download?, #needs_unpack?, unzip

Constructor Details

#initialize(opts = {}) ⇒ ResourceWikispecies

Returns a new instance of ResourceWikispecies.



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/dwca_hunter/resources/wikispecies.rb', line 4

def initialize(opts = {})
  @wikisp_path = File.join(Dir.tmpdir, 'dwca_hunter', 'wikispecies')
  @problems_file = open(File.join(Dir.tmpdir, 'problems.txt'), 'w:utf-8')
  @command = "wikispecies"
  @title = 'Wikispecies'
  @url = 'http://dumps.wikimedia.org/specieswiki/latest/' \
         'specieswiki-latest-pages-articles.xml.bz2'
  @url = opts[:url] if opts[:url]
  @uuid = '68923690-0727-473c-b7c5-2ae9e601e3fd'
  @download_path = File.join(@wikisp_path, 'data.xml.bz2')
  @data = []
  @templates = {}
  @taxon_ids = {}
  @tree = {}
  @paths = {}
  @extensions = []
  @re = {
    page_start: /^\s*\<page\>\s*$/,
    page_end: /^\s*\<\/page\>\s*$/,
    template: /Template:/i,
    template_link: /\{\{([^\}]*)\}\}/,
    vernacular_names: /\{\{\s*VN\s*\|([^\}]+)\}\}/i
  }
  super(opts)
end

Instance Method Details

#make_dwcaObject



34
35
36
37
38
# File 'lib/dwca_hunter/resources/wikispecies.rb', line 34

def make_dwca
  enrich_data
  extend_classification
  generate_dwca
end

#unpackObject



30
31
32
# File 'lib/dwca_hunter/resources/wikispecies.rb', line 30

def unpack
  unpack_bz2
end