Module: Stevedore

Defined in:
lib/split_archive.rb,
lib/stevedore-uploader.rb,
lib/parsers/stevedore_blob.rb,
lib/parsers/stevedore_html.rb,
lib/parsers/stevedore_email.rb,
lib/parsers/stevedore_csv_row.rb

Overview

splits PST and Mbox formats

Defined Under Namespace

Classes: ArchiveSplitter, ESUploader, StevedoreBlob, StevedoreCsvRow, StevedoreEmail, StevedoreHTML

Constant Summary collapse

DEFAULT_MAPPING =
{
  sha1: {type: :string, index: :not_analyzed},
  title: { type: :string, analyzer: :keyword },
  source_url: {type: :string, index: :not_analyzed},
  modifiedDate: { type: :date, format: "dateOptionalTime" },
  _updated_at: { type: :date },
  analyzed: {
    properties: {
      body: {
        type: :string, 
        index_options: :offsets, 
        term_vector: :with_positions_offsets,
        store: true,
        fields: {
          snowball: {
            type: :string,
            index: "analyzed",
            analyzer: 'snowball_analyzer' ,
            index_options: :offsets, 
            term_vector: :with_positions_offsets,
          }
        }
      },
      metadata: {
        properties: {
          # "attachments" =>  {type: :string, index: :not_analyzed}, # might break stuff; intended to keep the index name (which often contains relevant search terms) from being indexed, e.g. if a user wants to search for 'bernie' in the bernie-burlington-emails
          "Message-From" => {
            type: "string",
            fields: {
              email: {
                type: "string",
                analyzer: "email_analyzer"
              },
              "Message-From" => {
                type: "string"
              }
            }
          },
          "Message-To" => {
            type: "string",
            fields: {
              email: {
                type: "string",
                analyzer: "email_analyzer"
              },
              "Message-To" => {
                type: "string"
              }
            }
          }                  
        }
      }
    }
  }
}