Class: Jetel::Modules::Tiger

Inherits:
Module
  • Object
show all
Defined in:
lib/jetel/modules/tiger/tiger.rb

Constant Summary collapse

BASE_URL =
'https://www2.census.gov/geo/tiger/TIGER2015'
CSS_SELECTOR =
'#innerPage > table > tr > td > a'

Instance Attribute Summary

Attributes inherited from Module

#downloader

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Module

#download_dir, download_dir, #download_source, downloaded_file, #downloaded_file, extract_dir, #extract_dir, extracted_file, #extracted_file, #initialize, #load, #sources, target_dir, #target_dir, #transform_dir, transform_dir, transformed_file, #transformed_file, #unzip

Constructor Details

This class inherits a constructor from Jetel::Modules::Module

Class Method Details

.sourcesObject



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/jetel/modules/tiger/tiger.rb', line 58

def sources
  page = Nokogiri::HTML(open(BASE_URL))

  links = page.css(CSS_SELECTOR)

  res = links[2..-1].pmap(8) do |link|
    next unless link
    href = link.attr('href')
    url = "#{BASE_URL}/#{href}"

    puts "Processing #{url}"
    tmp = sub_sources(url)
  end

  res.flatten
end

.sub_sources(base_url) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/jetel/modules/tiger/tiger.rb', line 38

def sub_sources(base_url)
  page = Nokogiri::HTML(open(base_url))

  links = page.css(CSS_SELECTOR)

  links[1..-1].map do |link|
    next unless link
    name = base_url.split('/').last # link.text.gsub('/', '')
    href = link.attr('href')

    {
      name: name,
      url: "#{base_url}#{href}".gsub('http://www2', 'ftp://ftp'),
      filename_downloaded: href,
      # flat: true,
      # filename_transformed: "#{name}_adm?.topo.json"
    }
  end
end

Instance Method Details

#download(global_options, options, args) ⇒ Object



76
77
78
79
80
# File 'lib/jetel/modules/tiger/tiger.rb', line 76

def download(global_options, options, args)
  self.class.sources.pmap(16) do |source|
    download_source(source, global_options.merge(options))
  end
end

#extract(global_options, options, args) ⇒ Object



82
83
84
85
86
# File 'lib/jetel/modules/tiger/tiger.rb', line 82

def extract(global_options, options, args)
  self.class.sources.map do |source|
    unzip(source, global_options.merge(options))
  end
end

#transform(global_options, options, args) ⇒ Object



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/jetel/modules/tiger/tiger.rb', line 88

def transform(global_options, options, args)
  self.class.sources.pmap(8) do |source|
    extracted_file = extracted_file(source, global_options.merge(options))
    transformed_file = transformed_file(source, global_options.merge(options))
    dest_dir = transform_dir(source, global_options.merge(options))
    FileUtils.mkdir_p(dest_dir)

    extracted_dir = extract_dir(source, global_options.merge(options))
    Dir.glob("#{extracted_dir}/*.shp") do |shapefile|
      puts "Transforming #{shapefile}"

      # "topojson data/Gadm/AFG/extracted/AFG_adm0.shp -o data/Gadm/AFG/transformed/AFG_adm0.topo.json"
      cmd = "topojson #{shapefile} -o #{shapefile.gsub(extracted_dir, dest_dir).gsub('.shp', '.topo.json')}"
      puts cmd
      PTY.spawn(cmd) do |stdout, stdin, pid|
        begin
          # Do stuff with the output here. Just printing to show it works
          stdout.each { |line| print line }
        end
      end
    end
  end
end