Module: Elasticrawl
- Defined in:
- lib/elasticrawl.rb,
lib/elasticrawl/job.rb,
lib/elasticrawl/crawl.rb,
lib/elasticrawl/error.rb,
lib/elasticrawl/config.rb,
lib/elasticrawl/cluster.rb,
lib/elasticrawl/version.rb,
lib/elasticrawl/job_step.rb,
lib/elasticrawl/parse_job.rb,
lib/elasticrawl/combine_job.rb,
lib/elasticrawl/crawl_segment.rb
Defined Under Namespace
Classes: AWSCredentialsInvalidError, Cluster, CombineJob, Config, ConfigDirMissingError, Crawl, CrawlSegment, DatabaseAccessError, ElasticMapReduceAccessError, Error, FileAccessError, Job, JobStep, ParseJob, S3AccessError
Constant Summary collapse
- COMMON_CRAWL_BUCKET =
S3 locations
'commoncrawl'
- COMMON_CRAWL_PATH =
'crawl-data'
- SEGMENTS_PATH =
'segments'
- WARC_PATHS =
'warc.paths.gz'
- MAX_SEGMENTS =
256
- VERSION =
'1.1.8'