Google Street View Downloader
Description
Crawl and save Google Street view network (metadata + images) of a given geographical area. The metadata are saved into a redis db and the image in a directory (speed: meta_data of 200 panoramic images processed / seconds (= the complete meta_data of all the images of Paris are crawled in about 10 mins)
requirement
you need to install
- [http://redis.io redis] as light database
- [http://www.imagemagick.org/ imagemagick]
brew install redis brew install imagemagick
example
require 'rubygems'
require 'gsv_downloader'
# first we define a function to delimite the geographical area to crawl
# It stops to crawl further links when the area_validator return false
# @param json_response = the metadata received from of a PanoID (json)
# @return false or true whether this panoID is valid or not
area_validator = lambda { |json_response|
description = json_response["Location"]["region"]
!description[/Paris/].nil?
}
# basic/required crawling options
= {
# area name
area_name: "paris",
# area validator to delimit the crawling within a given area
area_validator: area_validator ,
}
# more advanced options
= {
# area name
area_name: "paris",
# area validator to delimit the crawling within a given area
area_validator: area_validator ,
# zoom level of the panoramic images saved
image_zoom: 3, # default value = 3
# main directory where the images will be downloaded
dest_dir: "./paris", # default value = "./{area_name}"
# nb of images for each subdirectory
# (since lots of images can be downloaded
# automatic subdirectories are generated)
sub_dir_size: 100 # default value = 1000
}
paris_area = GSVManager.new()
# crawl and save the metadata of all GSV images
# within the area delimited by the area_validator.
# The first crawl needs the panoID of a location
# https://cbks0.google.com/cbk?output=json&dm=1&pm=1&v=4&cb_client=maps_sv&fover=2&onerr=3&panoid=Np2alC97cgynvV_ZpJQZNA
# Speed 200 meta_data processed / seconds (= Paris is crawled in 10 mins)
start_panoID = "Np2alC97cgynvV_ZpJQZNA"
paris_area.(start_panoID)
# number of saved panoramas (meta_data) within this area
paris_area.nb_panoramas()
# get the raw json metadata related to each saved panoID
pano_ids = paris_area.panoramas()
pano_ids.each do |pano_id|
= paris_area.(panoID)
p JSON.parse()
end
# download manually each related GSV image
img_downloader = ImageDownloader.new
pano_ids.each do |pano_id|
img_downloader.download(pano_id)
# or with specific options zoom_level = 4, dest_dir = "./foo"
# paris_area.download_image(pano_id, zoom_level, dest_dir)
end
# parallel/multi-thread pool version, much faster
# download GSV images in parallel
img_downloader = ImageDownloaderParallel.new
pano_ids.each do |pano_id|
img_downloader.download(pano_id)
end
img_downloader.start()
# or let the manager find out the all missing images to download
# and organise the download directory
paris_area.download_images()