Module: Macros::Tei

Defined in:
lib/macros/tei.rb

Overview

Macros for extracting TEI values from Nokogiri documents

Instance Method Summary collapse

Instance Method Details

#generate_data_provider(xpath) ⇒ Object


6
7
8
9
10
11
12
# File 'lib/macros/tei.rb', line 6

def generate_data_provider(xpath)
  lambda do |record, accumulator|
    repository = record.xpath("#{xpath}/tei:repository", TrajectPlus::Macros::Tei::NS).map(&:text)
    institution = record.xpath("#{xpath}/tei:institution", TrajectPlus::Macros::Tei::NS).map(&:text)
    accumulator << [repository, institution].join(', ')
  end
end

#main_languageObject


14
15
16
17
18
19
20
21
22
# File 'lib/macros/tei.rb', line 14

def main_language
  tei_main_lang_xp = '/*/tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:msDesc/tei:msContents/tei:textLang/@mainLang'
  tei_lang_text_xp = '/*/tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:msDesc/tei:msContents/tei:textLang'
  first(
    extract_tei(tei_main_lang_xp, translation_map: ['marc_languages', default: '__passthrough__']),
    # the last one is separate to eventually pass fuzzy matching parameters
    extract_tei(tei_lang_text_xp, translation_map: ['marc_languages', default: '__passthrough__'])
  )
end

#other_languagesObject


24
25
26
27
28
29
30
31
32
# File 'lib/macros/tei.rb', line 24

def other_languages
  tei_other_langs_xp = '/*/tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:msDesc/tei:msContents/' \
                       'tei:textLang/@otherLangs'
  new_pipeline = TrajectPlus::Extraction::TransformPipeline.new(translation_map: 'marc_languages')
  lambda do |record, accumulator|
    node = record.xpath(tei_other_langs_xp, TrajectPlus::Macros::Tei::NS).first
    accumulator.concat(new_pipeline.transform(node.value.split(' '))) if node
  end
end

#penn_image_query(idx) ⇒ Object


40
41
42
# File 'lib/macros/tei.rb', line 40

def penn_image_query(idx)
  "/*/tei:facsimile/tei:surface[1]/tei:graphic[#{idx}]/@url"
end

#penn_image_uri(query) ⇒ Object


52
53
54
55
56
57
58
59
# File 'lib/macros/tei.rb', line 52

def penn_image_uri(query)
  lambda do |record, accumulator, context|
    # Identifier without the prefix
    id = context.output_hash['id'].first.sub(/^[^_]*_/, '')
    path = extract_tei(query).call(record, [], context).first
    accumulator << penn_uri(id, path)
  end
end

#penn_thumbnail_image_queryObject


48
49
50
# File 'lib/macros/tei.rb', line 48

def penn_thumbnail_image_query
  penn_image_query(2)
end

#penn_uri(id, path) ⇒ Object


61
62
63
# File 'lib/macros/tei.rb', line 61

def penn_uri(id, path)
  "http://openn.library.upenn.edu/Data/0001/#{id}/data/#{path}"
end

#penn_web_image_queryObject


44
45
46
# File 'lib/macros/tei.rb', line 44

def penn_web_image_query
  penn_image_query(3)
end

#public_domainObject


34
35
36
37
38
# File 'lib/macros/tei.rb', line 34

def public_domain
  lambda do |_, accumulator|
    accumulator << 'http://creativecommons.org/publicdomain/mark/1.0/'
  end
end