Class: DorIndexing::Indexers::DescriptiveMetadataIndexer

Inherits:
Object
  • Object
show all
Defined in:
lib/dor_indexing/indexers/descriptive_metadata_indexer.rb

Overview

Indexes the descriptive metadata rubocop:disable Metrics/ClassLength

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(cocina:) ⇒ DescriptiveMetadataIndexer

Returns a new instance of DescriptiveMetadataIndexer.



12
13
14
15
16
# File 'lib/dor_indexing/indexers/descriptive_metadata_indexer.rb', line 12

def initialize(cocina:, **)
  @cocina = cocina
  mods_ng = Cocina::Models::Mapping::ToMods::Description.transform(cocina.description, cocina.externalIdentifier)
  @stanford_mods_record = Stanford::Mods::Record.new.from_nk_node(mods_ng.root)
end

Instance Attribute Details

#cocinaObject (readonly)

Returns the value of attribute cocina.



10
11
12
# File 'lib/dor_indexing/indexers/descriptive_metadata_indexer.rb', line 10

def cocina
  @cocina
end

#stanford_mods_recordObject (readonly)

Returns the value of attribute stanford_mods_record.



10
11
12
# File 'lib/dor_indexing/indexers/descriptive_metadata_indexer.rb', line 10

def stanford_mods_record
  @stanford_mods_record
end

Instance Method Details

#to_solrHash

rubocop:disable Metrics/MethodLength rubocop:disable Metrics/AbcSize

Returns:

  • (Hash)

    the partial solr document for descriptive metadata



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/dor_indexing/indexers/descriptive_metadata_indexer.rb', line 21

def to_solr
  {
    # title
    'main_title_tenim' => main_title, # for searching; 2 more field types are copyFields in solr schema.xml
    'full_title_tenim' => full_title, # for searching; 1 more field type is copyField in solr schema.xml
    'additional_titles_tenim' => additional_titles, # for searching; 1 more field type is copyField in solr schema.xml
    'display_title_ss' => display_title, # for display in Argo

    # contributor
    'author_text_nostem_im' => author_primary, # primary author tokenized but not stemmed
    'author_display_ss' => author_primary, # used for author display in Argo
    'contributor_text_nostem_im' => author_all, # author names should be tokenized but not stemmed
    'contributor_orcids_ssim' => orcids,

    # topic
    'topic_ssim' => stanford_mods_record.topic_facet&.uniq,
    'topic_tesim' => stemmable_topics,

    # publication
    'originInfo_date_created_tesim' => creation_date,
    'originInfo_publisher_tesim' => publisher_name,
    'originInfo_place_placeTerm_tesim' => event_place, # do we want this?
    'sw_pub_date_facet_ssi' => stanford_mods_record.pub_year_int.to_s, # SW Date facet

    # SW facets plus a friend facet
    'sw_format_ssim' => sw_format, # SW Resource Type facet
    'mods_typeOfResource_ssim' => resource_type, # MODS Resource Type facet
    'sw_genre_ssim' => stanford_mods_record.sw_genre, # SW Genre facet
    'sw_language_ssim' => stanford_mods_record.sw_language_facet, # SW Language facet
    'sw_subject_temporal_ssim' => stanford_mods_record.era_facet, # SW Era facet
    'sw_subject_geographic_ssim' => subject_geographic, # SW Region facet

    # all the descriptive data that we want to search on, with different flavors for better recall and precision
    'descriptive_tiv' => all_search_text, # ICU tokenized, ICU folded
    'descriptive_text_nostem_i' => all_search_text, # whitespace tokenized, ICU folded, word delimited
    'descriptive_teiv' => all_search_text # ICU tokenized, ICU folded, minimal stemming
  }.select { |_k, v| v.present? }
end