Class: MarcIndexer
- Inherits:
-
Blacklight::Marc::Indexer
- Object
- Traject::Indexer::MarcIndexer
- Blacklight::Marc::Indexer
- MarcIndexer
- Includes:
- Blacklight::Marc::Indexer::Formats
- Defined in:
- lib/generators/blacklight/marc/templates/app/models/marc_indexer.rb
Constant Summary
Constants inherited from Blacklight::Marc::Indexer
Blacklight::Marc::Indexer::ATOU, Blacklight::Marc::Indexer::ATOZ
Instance Method Summary collapse
-
#initialize ⇒ MarcIndexer
constructor
A new instance of MarcIndexer.
Methods included from Blacklight::Marc::Indexer::Formats
Methods inherited from Blacklight::Marc::Indexer
Constructor Details
#initialize ⇒ MarcIndexer
Returns a new instance of MarcIndexer.
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/generators/blacklight/marc/templates/app/models/marc_indexer.rb', line 6 def initialize super settings do # type may be 'binary', 'xml', or 'json' provide "marc_source.type", "binary" # set this to be non-negative if threshold should be enforced provide 'solr_writer.max_skipped', -1 end to_field "id", extract_marc("001"), trim, first_only to_field 'marc_ss', get_xml to_field "all_text_timv", extract_all_marc_values do |r, acc| acc.replace [acc.join(' ')] # turn it into a single string end to_field "language_ssim", marc_languages("008[35-37]:041a:041d:") to_field "format", get_format to_field "isbn_tsim", extract_marc('020a', separator: nil) do |rec, acc| orig = acc.dup acc.map!{|x| StdNum::ISBN.allNormalizedValues(x)} acc << orig acc.flatten! acc.uniq! end to_field 'material_type_ssm', extract_marc('300a'), trim_punctuation # Title fields # primary title to_field 'title_tsim', extract_marc('245a') to_field 'title_ssm', extract_marc('245a', alternate_script: false), trim_punctuation to_field 'title_vern_ssm', extract_marc('245a', alternate_script: :only), trim_punctuation # subtitle to_field 'subtitle_tsim', extract_marc('245b') to_field 'subtitle_ssm', extract_marc('245b', alternate_script: false), trim_punctuation to_field 'subtitle_vern_ssm', extract_marc('245b', alternate_script: :only), trim_punctuation # additional title fields to_field 'title_addl_tsim', extract_marc(%W{ 245abnps 130#{ATOZ} 240abcdefgklmnopqrs 210ab 222ab 242abnp 243abcdefgklmnopqrs 246abcdefgnp 247abcdefgnp }.join(':')) to_field 'title_added_entry_tsim', extract_marc(%W{ 700gklmnoprst 710fgklmnopqrst 711fgklnpst 730abcdefgklmnopqrst 740anp }.join(':')) to_field 'title_series_tsim', extract_marc("440anpv:490av") to_field 'title_si', marc_sortable_title # Author fields to_field 'author_tsim', extract_marc("100abcegqu:110abcdegnu:111acdegjnqu") to_field 'author_addl_tsim', extract_marc("700abcegqu:710abcdegnu:711acdegjnqu") to_field 'author_ssm', extract_marc("100abcdq:110#{ATOZ}:111#{ATOZ}", alternate_script: false) to_field 'author_vern_ssm', extract_marc("100abcdq:110#{ATOZ}:111#{ATOZ}", alternate_script: :only) # JSTOR isn't an author. Try to not use it as one to_field 'author_si', # Subject fields to_field 'subject_tsim', extract_marc(%W( 600#{ATOU} 610#{ATOU} 611#{ATOU} 630#{ATOU} 650abcde 651ae 653a:654abcde:655abc ).join(':')) to_field 'subject_addl_tsim', extract_marc("600vwxyz:610vwxyz:611vwxyz:630vwxyz:650vwxyz:651vwxyz:654vwxyz:655vwxyz") to_field 'subject_ssim', extract_marc("600abcdq:610ab:611ab:630aa:650aa:653aa:654ab:655ab"), trim_punctuation to_field 'subject_era_ssim', extract_marc("650y:651y:654y:655y"), trim_punctuation to_field 'subject_geo_ssim', extract_marc("651a:650z"), trim_punctuation # Publication fields to_field 'published_ssm', extract_marc('260a', alternate_script: false), trim_punctuation to_field 'published_vern_ssm', extract_marc('260a', alternate_script: :only), trim_punctuation to_field 'pub_date_si', marc_publication_date to_field 'pub_date_ssim', marc_publication_date # Call Number fields to_field 'lc_callnum_ssm', extract_marc('050ab'), first_only first_letter = lambda {|rec, acc| acc.map!{|x| x[0]} } to_field 'lc_1letter_ssim', extract_marc('050ab'), first_only, first_letter, translation_map('callnumber_map') alpha_pat = /\A([A-Z]{1,3})\d.*\Z/ alpha_only = lambda do |rec, acc| acc.map! do |x| (m = alpha_pat.match(x)) ? m[1] : nil end acc.compact! # eliminate nils end to_field 'lc_alpha_ssim', extract_marc('050a'), alpha_only, first_only to_field 'lc_b4cutter_ssim', extract_marc('050a'), first_only # URL Fields notfulltext = /abstract|description|sample text|table of contents|/i to_field('url_fulltext_ssm') do |rec, acc| rec.fields('856').each do |f| case f.indicator2 when '0' f.find_all{|sf| sf.code == 'u'}.each do |url| acc << url.value end when '2' # do nothing else z3 = [f['z'], f['3']].join(' ') unless notfulltext.match(z3) acc << f['u'] unless f['u'].nil? end end end end # Very similar to url_fulltext_display. Should DRY up. to_field 'url_suppl_ssm' do |rec, acc| rec.fields('856').each do |f| case f.indicator2 when '2' f.find_all{|sf| sf.code == 'u'}.each do |url| acc << url.value end when '0' # do nothing else z3 = [f['z'], f['3']].join(' ') if notfulltext.match(z3) acc << f['u'] unless f['u'].nil? end end end end end |