Module: IiifPrint

Extended by:
ActiveSupport::Autoload
Defined in:
lib/iiif_print.rb,
lib/iiif_print/data.rb,
lib/iiif_print/engine.rb,
lib/iiif_print/errors.rb,
lib/iiif_print/version.rb,
lib/iiif_print/metadata.rb,
lib/iiif_print/image_tool.rb,
lib/iiif_print/configuration.rb,
lib/iiif_print/data/work_file.rb,
lib/iiif_print/data/work_files.rb,
lib/iiif_print/lineage_service.rb,
lib/iiif_print/text_extraction.rb,
lib/iiif_print/data/path_helper.rb,
lib/iiif_print/persistence_layer.rb,
app/listeners/iiif_print/listener.rb,
lib/iiif_print/jp2_image_metadata.rb,
lib/iiif_print/data/fileset_helper.rb,
lib/iiif_print/data/work_derivatives.rb,
lib/iiif_print/catalog_search_builder.rb,
lib/iiif_print/jp2_derivative_service.rb,
lib/iiif_print/pdf_derivative_service.rb,
lib/iiif_print/base_derivative_service.rb,
lib/iiif_print/homepage_search_builder.rb,
lib/iiif_print/tiff_derivative_service.rb,
lib/iiif_print/split_pdfs/base_splitter.rb,
lib/iiif_print/text_extraction/page_ocr.rb,
app/jobs/iiif_print/jobs/application_job.rb,
app/models/iiif_print/application_record.rb,
app/helpers/iiif_print/application_helper.rb,
app/mailers/iiif_print/application_mailer.rb,
app/models/iiif_print/ingest_file_relation.rb,
app/models/iiif_print/pending_relationship.rb,
lib/generators/iiif_print/assets_generator.rb,
lib/iiif_print/text_extraction/alto_reader.rb,
lib/iiif_print/text_extraction/hocr_reader.rb,
lib/iiif_print/text_extraction/render_alto.rb,
app/models/iiif_print/derivative_attachment.rb,
app/models/iiif_print/iiif_search_decorator.rb,
lib/generators/iiif_print/install_generator.rb,
app/helpers/iiif_print/iiif_helper_decorator.rb,
app/models/concerns/iiif_print/set_child_flag.rb,
lib/iiif_print/text_formats_from_alto_service.rb,
app/jobs/iiif_print/jobs/request_split_pdf_job.rb,
app/controllers/iiif_print/split_pdfs_controller.rb,
app/services/iiif_print/derivative_rodeo_service.rb,
lib/iiif_print/split_pdfs/pages_to_jpgs_splitter.rb,
lib/iiif_print/split_pdfs/pages_to_pngs_splitter.rb,
app/jobs/iiif_print/jobs/child_works_from_pdf_job.rb,
app/jobs/iiif_print/jobs/create_relationships_job.rb,
lib/iiif_print/persistence_layer/valkyrie_adapter.rb,
lib/iiif_print/split_pdfs/pages_to_tiffs_splitter.rb,
lib/iiif_print/text_extraction_derivative_service.rb,
lib/iiif_print/text_extraction/word_coords_builder.rb,
lib/iiif_print/works_controller_behavior_decorator.rb,
lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb,
app/models/iiif_print/iiif_search_response_decorator.rb,
app/actors/iiif_print/actors/file_set_actor_decorator.rb,
app/models/concerns/iiif_print/solr_document_decorator.rb,
app/presenters/iiif_print/file_set_presenter_decorator.rb,
app/search_builders/concerns/iiif_print/exclude_models.rb,
app/services/iiif_print/simple_schema_loader_decorator.rb,
lib/generators/iiif_print/catalog_controller_generator.rb,
lib/iiif_print/persistence_layer/active_fedora_adapter.rb,
lib/iiif_print/split_pdfs/pdf_image_extraction_service.rb,
app/presenters/iiif_print/work_show_presenter_decorator.rb,
lib/iiif_print/split_pdfs/destroy_pdf_child_works_service.rb,
app/services/iiif_print/manifest_builder_service_decorator.rb,
lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb,
app/indexers/concerns/iiif_print/file_set_indexer_decorator.rb,
app/presenters/iiif_print/iiif_manifest_presenter_decorator.rb,
app/search_builders/concerns/iiif_print/allinson_flex_fields.rb,
app/indexers/concerns/iiif_print/child_work_indexer_decorator.rb,
app/actors/iiif_print/actors/cleanup_file_sets_actor_decorator.rb,
lib/iiif_print/split_pdfs/child_work_creation_from_pdf_service.rb,
app/search_builders/concerns/iiif_print/highlight_search_params.rb,
app/presenters/iiif_print/iiif_manifest_presenter_factory_decorator.rb,
app/presenters/iiif_print/iiif_manifest_presenter/display_image_presenter_decorator.rb

Overview

mixin to provide URL for IIIF Content Search service

Defined Under Namespace

Modules: Actors, AllinsonFlexFields, ApplicationHelper, BlacklightIiifSearch, ChildWorkIndexerDecorator, Data, ExcludeModels, FileSetIndexerDecorator, FileSetPresenterDecorator, HighlightSearchParams, IiifHelperDecorator, IiifManifestPresenter, IiifManifestPresenterDecorator, IiifManifestPresenterFactoryDecorator, IiifPrintHelperBehavior, IiifSearchDecorator, IiifSearchResponseDecorator, Jobs, LineageService, ManifestBuilderServiceDecorator, PersistenceLayer, SetChildFlag, SimpleSchemaLoaderDecorator, SolrDocumentDecorator, SplitPdfs, TextExtraction, WorkShowPresenterDecorator, WorksControllerBehaviorDecorator Classes: ApplicationMailer, ApplicationRecord, AssetsGenerator, BaseDerivativeService, CatalogControllerGenerator, CatalogSearchBuilder, CollectionFieldShim, Configuration, DataError, DerivativeAttachment, DerivativeRodeoService, Engine, Field, HomepageSearchBuilder, IiifPrintError, ImageTool, IngestFileRelation, InstallGenerator, JP2DerivativeService, JP2ImageMetadata, Listener, Metadata, MissingFileError, PDFDerivativeService, PendingRelationship, PluggableDerivativeService, SplitPdfsController, TIFFDerivativeService, TextExtractionDerivativeService, TextFormatsFromALTOService, UnexpectedMimeTypeError, WorkNotConfiguredToSplitFileSetError

Constant Summary collapse

DEFAULT_MODEL_CONFIGURATION =

NOTE: We use lambdas so we can have default values but also provide a lazy configuration. There are certainly better ways but this is the least intrusive refactor from prior state.

{
  # Split a PDF into individual page images and create a new child work for each image.
  pdf_splitter_job: -> { IiifPrint::Jobs::ChildWorksFromPdfJob },
  pdf_splitter_service: -> { IiifPrint::SplitPdfs::PagesToJpgsSplitter },
  derivative_service_plugins: lambda {
                                [
                                  IiifPrint::TextExtractionDerivativeService
                                ]
                              }
}.freeze
GEM_PATH =

module constants:

Gem::Specification.find_by_name("iiif_print").gem_dir
VERSION =
'3.0.8'.freeze

Class Method Summary collapse

Class Method Details

.allinson_flex_fieldsArray<IiifPrint::Field>

Returns:



254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'lib/iiif_print.rb', line 254

def self.allinson_flex_fields
  return @allinson_flex_fields if defined?(@allinson_flex_fields)

  allinson_flex_relation = AllinsonFlex::ProfileProperty
                           .joins(:texts)
                           .where(allinson_flex_profile_texts: { name: 'display_label' })
                           .distinct
                           .select(:name, :value, :indexing)
  flex_fields = allinson_flex_relation.to_a
  unless allinson_flex_relation.exists?(name: 'collection')
    collection_field = CollectionFieldShim.new(name: :collection, value: 'Collection', indexing: [])
    flex_fields << collection_field
  end
  @allinson_flex_fields = flex_fields
end

.conditionally_submit_split_for(work:, file_set:, locations:, user:, skip_these_endings: skip_splitting_pdf_files_that_end_with_these_texts) ⇒ Symbol

Returns when none of the locations are to be split.

Parameters:

  • work (ActiveFedora::Base)
  • file_set (FileSet)
  • locations (Array<String>)
  • user (User)

Returns:

  • (Symbol)

    when none of the locations are to be split.



291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# File 'lib/iiif_print.rb', line 291

def self.conditionally_submit_split_for(work:, file_set:, locations:, user:, skip_these_endings: skip_splitting_pdf_files_that_end_with_these_texts)
  locations = locations.select { |location| split_for_path_suffix?(location, skip_these_endings: skip_these_endings) }
  return :no_pdfs_for_splitting if locations.empty?

  # Hyrax::FileSet ids are Valkyrie::ID's which can't be passed, so we call id on that and get the string id
  file_set_id = file_set.id.try(:id) || file_set.id
  work_admin_set_id = work.admin_set_id.try(:id) || work.admin_set_id

  work.try(:iiif_print_config)&.pdf_splitter_job&.perform_later(
    file_set_id,
    locations,
    user,
    work_admin_set_id,
    0 # A no longer used parameter; but we need to preserve the method signature (for now)
  )
end

.config {|config| ... } ⇒ IiifPrint::Configuration

Exposes the IiifPrint configuration.

Yield Parameters:

Returns:

See Also:



38
39
40
41
42
# File 'lib/iiif_print.rb', line 38

def self.config(&block)
  @config ||= IiifPrint::Configuration.new
  yield @config if block
  @config
end

.default_fields(fields: config.metadata_fields) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

TODO:

Figure out a way to use a custom label, right now it takes it get rendered from the title.



215
216
217
218
219
220
221
222
223
# File 'lib/iiif_print.rb', line 215

def self.default_fields(fields: config.)
  fields.map do |field|
    Field.new(
      name: field.first,
      label: Hyrax::Renderers::AttributeRenderer.new(field.first, nil).label,
      options: field.last
    )
  end
end

.fields_for_allinson_flex(fields: allinson_flex_fields, sort_order: IiifPrint.config.iiif_metadata_field_presentation_order) ⇒ Object

Parameters:



227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/iiif_print.rb', line 227

def self.fields_for_allinson_flex(fields: allinson_flex_fields, sort_order: IiifPrint.config.)
  fields = sort_af_fields!(fields, sort_order: sort_order)
  fields.each_with_object({}) do |field, hash|
    # filters out admin_only fields
    next if field.indexing&.include?('admin_only')

    # WARNING: This is assuming A LOT
    # This is taking the Allinson Flex fields that have the same name and only
    # using the first one while discarding the rest.  There currently no way to
    # controller which one(s) are discarded but this fits for the moment.
    next if hash.key?(field.name)

    # currently only supports the faceted option
    # Why the `render_as:`? This was originally derived from Hyku default attributes
    # @see https://github.com/samvera/hyku/blob/c702844de4c003eaa88eb5a7514c7a1eae1b289e/app/views/hyrax/base/_attribute_rows.html.erb#L3
    hash[field.name] = Field.new(
      name: field.name,
      label: field.value,
      options: field.indexing&.include?('facetable') ? { render_as: :faceted } : nil
    )
  end.values
end

.manifest_metadata_for(work:, version: config.default_iiif_manifest_version, fields: defined?(AllinsonFlex) ? fields_for_allinson_flex : default_fields, current_ability:, base_url:) ⇒ Array<Hash>

Map the given work’s metadata to the given IIIF version spec’s metadata structure. This is intended to be a drop-in replacement for ‘Hyrax::IiifManifestPresenter#manifest_metadata`.

Parameters:

  • work (Object)
  • version (Integer) (defaults to: config.default_iiif_manifest_version)
  • fields (Array<IiifPrint::Metadata::Field>, Array<#name, #label>) (defaults to: defined?(AllinsonFlex) ? fields_for_allinson_flex : default_fields)

Returns:

  • (Array<Hash>)

See Also:

  • for expected output
  • Hyrax::IiifManifestPresenter#manifest_metadata


191
192
193
194
195
196
197
198
199
200
201
# File 'lib/iiif_print.rb', line 191

def self.(work:,
                               version: config.default_iiif_manifest_version,
                               fields: defined?(AllinsonFlex) ? fields_for_allinson_flex : default_fields,
                               current_ability:,
                               base_url:)
  Metadata.(work: work,
                              version: version,
                              fields: fields,
                              current_ability: current_ability,
                              base_url: base_url)
end

.manifest_metadata_from(work:, presenter:) ⇒ Object



203
204
205
206
207
# File 'lib/iiif_print.rb', line 203

def self.(work:, presenter:)
  current_ability = presenter.try(:ability) || presenter.try(:current_ability)
  base_url = presenter.try(:base_url) || presenter.try(:request)&.base_url
  IiifPrint.(work: work, current_ability: current_ability, base_url: base_url)
end

.model_configuration(**kwargs) ⇒ Module

TODO:

Because not every job will split PDFs and write to a child model. May want to introduce an alternative splitting method to create new filesets on the existing work instead of new child works.

This method is responsible for configuring a model for additional derivative generation.

rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity

Examples:

class Book < ActiveFedora::Base
  include IiifPrint.model_configuration(
    pdf_split_child_model: Page,
    derivative_service_plugins: [
      IiifPrint::JP2DerivativeService,
      IiifPrint::PDFDerivativeService,
      IiifPrint::TextExtractionDerivativeService,
      IiifPrint::TIFFDerivativeService
    ]
  )
end

Parameters:

  • kwargs (Hash<Symbol,Object>)

    the configuration values that overrides the DEFAULT_MODEL_CONFIGURATION.

Options Hash (**kwargs):

Returns:

  • (Module)

See Also:



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/iiif_print.rb', line 129

def self.model_configuration(**kwargs)
  Module.new do
    extend ActiveSupport::Concern

    included do
      work_type = self # In this case self is the class we're mixing the new module into.

      # Ensure that the work_type and corresponding indexer are properly decorated for IiifPrint
      indexer = if defined?(Valkyrie::Resource) && work_type < Valkyrie::Resource
                  IiifPrint::PersistenceLayer::ValkyrieAdapter.decorate_with_adapter_logic(work_type: work_type)
                elsif work_type < ActiveFedora::Base
                  IiifPrint::PersistenceLayer::ActiveFedoraAdapter.decorate_with_adapter_logic(work_type: work_type)
                else
                  raise "Unable to mix '.model_configuration' into #{work_type}"
                end

      # Ensure that the work_type and corresponding indexer are properly decorated for IiifPrint
      if defined?(Valkyrie::Resource) && work_type < Valkyrie::Resource
        IiifPrint::PersistenceLayer::ValkyrieAdapter.decorate_form_with_adapter_logic(work_type: work_type)
      elsif work_type < ActiveFedora::Base
        IiifPrint::PersistenceLayer::ActiveFedoraAdapter.decorate_form_with_adapter_logic(work_type: work_type)
      else
        raise "Unable to mix '.model_configuration' into #{work_type}"
      end

      # Deriving lineage of objects is a potentially complicated thing.  We provide a default
      # service but each work_type's indexer can be configured by amending it's
      # {.iiif_print_lineage_service}.
      indexer.class_attribute(:iiif_print_lineage_service, default: IiifPrint::LineageService) unless indexer.respond_to?(:iiif_print_lineage_service)
      work_type::GeneratedResourceSchema.send(:include, IiifPrint::SetChildFlag) if work_type.const_defined?(:GeneratedResourceSchema)
    end

    # We don't know what you may want in your configuration, but from this gems implementation,
    # we're going to provide the defaults to ensure that it works.
    DEFAULT_MODEL_CONFIGURATION.each_pair do |key, default_value|
      kwargs[key] ||= default_value.call
    end

    define_method(:iiif_print_config) do
      @iiif_print_config ||= ModelConfig.new(**kwargs)
    end

    def iiif_print_config?
      true
    end
  end
end

.sort_af_fields!(fields, sort_order:) ⇒ Object

Parameters:



273
274
275
276
277
278
279
280
# File 'lib/iiif_print.rb', line 273

def self.sort_af_fields!(fields, sort_order:)
  return fields if sort_order.blank?

  fields.sort_by do |field|
    sort_order_index = sort_order.index(field.name.to_sym)
    sort_order_index.nil? ? sort_order.length : sort_order_index
  end
end

.split_for_path_suffix?(path, skip_these_endings: skip_splitting_pdf_files_that_end_with_these_texts) ⇒ TrueClass, FalseClass

Parameters:

  • path (String)

    the path, hopefully with an extension, to the file we’re considering splitting.

  • skip_these_endings (Array<#downcase>) (defaults to: skip_splitting_pdf_files_that_end_with_these_texts)

    the endings that we should skip for splitting purposes.

Returns:

  • (TrueClass)

    when the path is one we should split

  • (FalseClass)

    when the path is one we should not split

See Also:

  • skip_splitting_pdf_files_that_end_with_these_texts


319
320
321
322
323
# File 'lib/iiif_print.rb', line 319

def self.split_for_path_suffix?(path, skip_these_endings: skip_splitting_pdf_files_that_end_with_these_texts)
  return false unless path.downcase.end_with?('.pdf')
  return true if skip_these_endings.empty?
  !path.downcase.end_with?(*skip_these_endings.map(&:downcase))
end