Module: Elasticrawl

Defined in:
lib/elasticrawl.rb,
lib/elasticrawl/job.rb,
lib/elasticrawl/crawl.rb,
lib/elasticrawl/error.rb,
lib/elasticrawl/config.rb,
lib/elasticrawl/cluster.rb,
lib/elasticrawl/version.rb,
lib/elasticrawl/job_step.rb,
lib/elasticrawl/parse_job.rb,
lib/elasticrawl/combine_job.rb,
lib/elasticrawl/crawl_segment.rb

Defined Under Namespace

Classes: AWSCredentialsInvalidError, Cluster, CombineJob, Config, ConfigDirMissingError, Crawl, CrawlSegment, DatabaseAccessError, ElasticMapReduceAccessError, Error, FileAccessError, Job, JobStep, ParseJob, S3AccessError

Constant Summary collapse

COMMON_CRAWL_BUCKET =

S3 locations

'commoncrawl'
COMMON_CRAWL_PATH =
'crawl-data'
SEGMENTS_PATH =
'segments'
WARC_PATHS =
'warc.paths.gz'
MAX_SEGMENTS =
256
VERSION =
'1.1.8'