Module: EnjuNdl::NdlSearch::ClassMethods

Defined in:
lib/enju_ndl/ndl_search.rb

Instance Method Summary collapse

Instance Method Details

#create_additional_attributes(doc, manifestation) ⇒ Object



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/enju_ndl/ndl_search.rb', line 197

def create_additional_attributes(doc, manifestation)
  title = get_title(doc)
  creators = get_creators(doc).uniq
  subjects = get_subjects(doc).uniq
  classifications = get_classifications(doc).uniq
  classification_urls = doc.xpath('//dcterms:subject[@rdf:resource]').map { |subject| subject.attributes['resource'].value }

  Agent.transaction do
    creator_agents = Agent.import_agents(creators)
    content_type_id = begin
                        ContentType.where(name: 'text').first.id
                      rescue
                        1
                      end
    manifestation.creators << creator_agents

    if defined?(EnjuSubject)
      subject_heading_type = SubjectHeadingType.where(name: 'ndlsh').first || SubjectHeadingType.create!(name: 'ndlsh')
      subjects.each do |term|
        subject = Subject.where(term: term[:term]).first
        unless subject
          subject = Subject.new(term)
          subject.subject_heading_type = subject_heading_type
          subject.subject_type = SubjectType.where(name: 'concept').first || SubjectType.create!(name: 'concept')
        end
        # if subject.valid?
        manifestation.subjects << subject
        # end
        # subject.save!
      end
      if classification_urls
        classification_urls.each do |url|
          begin
            ndc_url = URI.parse(url)
          rescue URI::InvalidURIError
          end
          next unless ndc_url
          ndc_type = ndc_url.path.split('/').reverse[1]
          next unless (ndc_type == 'ndc9') || (ndc_type == 'ndc10')
          ndc = ndc_url.path.split('/').last
          classification_type = ClassificationType.where(name: ndc_type).first || ClassificationType.create!(name: ndc_type)
          classification = Classification.new(category: ndc)
          classification.classification_type = classification_type
          manifestation.classifications << classification if classification.valid?
        end
      end
      ndc8 = doc.xpath('//dc:subject[@rdf:datatype="http://ndl.go.jp/dcndl/terms/NDC8"]').first
      if ndc8
        classification_type = ClassificationType.where(name: 'ndc8').first || ClassificationType.create!(name: 'ndc8')
        classification = Classification.new(category: ndc8.content)
        classification.classification_type = classification_type
        manifestation.classifications << classification if classification.valid?
      end
    end
  end
end

#import_from_ndl_search(options) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/enju_ndl/ndl_search.rb', line 21

def import_from_ndl_search(options)
  # if options[:isbn]
  lisbn = Lisbn.new(options[:isbn])
  raise EnjuNdl::InvalidIsbn unless lisbn.valid?
  # end

  manifestation = Manifestation.find_by_isbn(lisbn.isbn)
  return manifestation.first if manifestation.present?

  doc = return_xml(lisbn.isbn)
  raise EnjuNdl::RecordNotFound unless doc
  # raise EnjuNdl::RecordNotFound if doc.at('//openSearch:totalResults').content.to_i == 0
  import_record(doc)
end

#import_isbn(isbn) ⇒ Object



9
10
11
12
# File 'lib/enju_ndl/ndl_search.rb', line 9

def import_isbn(isbn)
  manifestation = Manifestation.import_from_ndl_search(isbn: isbn)
  manifestation
end

#import_ndl_bib_id(ndl_bib_id) ⇒ Object



15
16
17
18
19
# File 'lib/enju_ndl/ndl_search.rb', line 15

def import_ndl_bib_id(ndl_bib_id)
  url = "https://iss.ndl.go.jp/books/R100000002-I#{ndl_bib_id}-00.rdf"
  doc = Nokogiri::XML(Faraday.get(url).body)
  import_record(doc)
end

#import_record(doc) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/enju_ndl/ndl_search.rb', line 36

def import_record(doc)
  iss_itemno = URI.parse(doc.at('//dcndl:BibAdminResource[@rdf:about]').values.first).path.split('/').last
  identifier_type = IdentifierType.where(name: 'iss_itemno').first
  identifier_type ||= IdentifierType.where(name: 'iss_itemno').create!
  identifier = Identifier.where(body: iss_itemno, identifier_type_id: identifier_type.id).first
  return identifier.manifestation if identifier

  jpno = doc.at('//dcterms:identifier[@rdf:datatype="http://ndl.go.jp/dcndl/terms/JPNO"]').try(:content)

  publishers = get_publishers(doc)

  # title
  title = get_title(doc)

  # date of publication
  pub_date = doc.at('//dcterms:issued').try(:content).to_s.tr('.', '-')
  pub_date = nil unless pub_date =~ /^\d+(-\d{0,2}){0,2}$/
  if pub_date
    date = pub_date.split('-')
    date = if date[0] && date[1]
             format('%04d-%02d', date[0], date[1])
           else
             pub_date
           end
  end

  language = Language.where(iso_639_2: get_language(doc)).first
  language_id = if language
                  language.id
                else
                  1
                end

  isbn = Lisbn.new(doc.at('//dcterms:identifier[@rdf:datatype="http://ndl.go.jp/dcndl/terms/ISBN"]').try(:content).to_s).try(:isbn)
  issn = StdNum::ISSN.normalize(doc.at('//dcterms:identifier[@rdf:datatype="http://ndl.go.jp/dcndl/terms/ISSN"]').try(:content))
  issn_l = StdNum::ISSN.normalize(doc.at('//dcterms:identifier[@rdf:datatype="http://ndl.go.jp/dcndl/terms/ISSNL"]').try(:content))

  carrier_type = content_type = nil
  is_serial = nil
  doc.xpath('//dcndl:materialType[@rdf:resource]').each do |d|
    case d.attributes['resource'].try(:content)
    when 'http://ndl.go.jp/ndltype/Book'
      carrier_type = CarrierType.where(name: 'print').first
      content_type = ContentType.where(name: 'text').first
    when 'http://ndl.go.jp/ndltype/Braille'
      content_type = ContentType.where(name: 'tactile_text').first
    # when 'http://ndl.go.jp/ndltype/ComputerProgram'
    #  content_type = ContentType.where(name: 'computer_program').first
    when 'http://ndl.go.jp/ndltype/ElectronicResource'
      carrier_type = CarrierType.where(name: 'file').first
    when 'http://ndl.go.jp/ndltype/Journal'
      is_serial = true
    when 'http://ndl.go.jp/ndltype/Map'
      content_type = ContentType.where(name: 'cartographic_image').first
    when 'http://ndl.go.jp/ndltype/Music'
      content_type = ContentType.where(name: 'performed_music').first
    when 'http://ndl.go.jp/ndltype/MusicScore'
      content_type = ContentType.where(name: 'notated_music').first
    when 'http://ndl.go.jp/ndltype/Painting'
      content_type = ContentType.where(name: 'still_image').first
    when 'http://ndl.go.jp/ndltype/Photograph'
      content_type = ContentType.where(name: 'still_image').first
    when 'http://ndl.go.jp/ndltype/PicturePostcard'
      content_type = ContentType.where(name: 'still_image').first
    when 'http://purl.org/dc/dcmitype/MovingImage'
      content_type = ContentType.where(name: 'two_dimensional_moving_image').first
    when 'http://purl.org/dc/dcmitype/Sound'
      content_type = ContentType.where(name: 'sounds').first
    when 'http://purl.org/dc/dcmitype/StillImage'
      content_type = ContentType.where(name: 'still_image').first
    end
  end

  admin_identifier = doc.at('//dcndl:BibAdminResource[@rdf:about]').attributes['about'].value
  description = doc.at('//dcterms:abstract').try(:content)
  price = doc.at('//dcndl:price').try(:content)
  volume_number_string = doc.at('//dcndl:volume/rdf:Description/rdf:value').try(:content)
  extent = get_extent(doc)
  publication_periodicity = doc.at('//dcndl:publicationPeriodicity').try(:content)
  statement_of_responsibility = doc.xpath('//dcndl:BibResource/dc:creator').map(&:content).join('; ')
  publication_place = doc.at('//dcterms:publisher/foaf:Agent/dcndl:location').try(:content)
  edition_string = doc.at('//dcndl:edition').try(:content)

  manifestation = Manifestation.where(manifestation_identifier: admin_identifier).first
  return manifestation if manifestation

  Agent.transaction do
    publisher_agents = Agent.import_agents(publishers)

    manifestation = Manifestation.new(
      manifestation_identifier: admin_identifier,
      original_title: title[:manifestation],
      title_transcription: title[:transcription],
      title_alternative: title[:alternative],
      title_alternative_transcription: title[:alternative_transcription],
      # TODO: NDLサーチに入っている図書以外の資料を調べる
      #:carrier_type_id => CarrierType.where(name: 'print').first.id,
      language_id: language_id,
      pub_date: date,
      description: description,
      volume_number_string: volume_number_string,
      price: price,
      statement_of_responsibility: statement_of_responsibility,
      start_page: extent[:start_page],
      end_page: extent[:end_page],
      height: extent[:height],
      extent: extent[:extent],
      dimensions: extent[:dimensions],
      publication_place: publication_place,
      edition_string: edition_string
    )
    manifestation.serial = true if is_serial
    identifier = {}
    if isbn
      identifier[:isbn] = Identifier.new(body: isbn)
      identifier[:isbn].identifier_type = IdentifierType.find_by(name: 'isbn') || IdnetifierType.create!(name: 'isbn')
    end
    if iss_itemno
      identifier[:iss_itemno] = Identifier.new(body: iss_itemno)
      identifier[:iss_itemno].identifier_type = IdentifierType.find_by(name: 'iss_itemno') || IdentifierType.create!(name: 'iss_itemno')
    end
    if jpno
      identifier[:jpno] = Identifier.new(body: jpno)
      identifier[:jpno].identifier_type = IdentifierType.find_by(name: 'jpno') || IdentifierType.create!(name: 'jpno')
    end
    if issn
      identifier[:issn] = Identifier.new(body: issn)
      identifier[:issn].identifier_type = IdentifierType.find_by(name: 'issn') || IdentifierType.create!(name: 'issn')
    end
    if issn_l
      identifier[:issn_l] = Identifier.new(body: issn_l)
      identifier[:issn_l].identifier_type = IdentifierType.where(name: 'issn_l').first || IdentifierType.create!(name: 'issn_l')
    end
    manifestation.carrier_type = carrier_type if carrier_type
    manifestation.manifestation_content_type = content_type if content_type
    if manifestation.save
      identifier.each do |_k, v|
        manifestation.identifiers << v if v.valid?
      end
      manifestation.publishers << publisher_agents
      create_additional_attributes(doc, manifestation)
      if is_serial
        series_statement = SeriesStatement.new(
          original_title: title[:manifestation],
          title_alternative: title[:alternative],
          title_transcription: title[:transcription],
          series_master: true
        )
        if series_statement.valid?
          manifestation.series_statements << series_statement
        end
      else
        create_series_statement(doc, manifestation)
      end
    end
  end

  # manifestation.send_later(:create_frbr_instance, doc.to_s)
  manifestation
end

#normalize_isbn(isbn) ⇒ Object



270
271
272
273
274
275
276
# File 'lib/enju_ndl/ndl_search.rb', line 270

def normalize_isbn(isbn)
  if isbn.length == 10
    Lisbn.new(isbn).isbn13
  else
    Lisbn.new(isbn).isbn10
  end
end

#return_xml(isbn) ⇒ Object



278
279
280
281
282
283
284
285
286
287
# File 'lib/enju_ndl/ndl_search.rb', line 278

def return_xml(isbn)
  rss = search_ndl(isbn, dpid: 'iss-ndl-opac', item: 'isbn')
  if rss.channel.totalResults.to_i == 0
    isbn = normalize_isbn(isbn)
    rss = search_ndl(isbn, dpid: 'iss-ndl-opac', item: 'isbn')
  end
  if rss.items.first
    doc = Nokogiri::XML(Faraday.get("#{rss.items.first.link}.rdf").body)
  end
end

#search_ndl(query, options = {}) ⇒ Object



254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'lib/enju_ndl/ndl_search.rb', line 254

def search_ndl(query, options = {})
  options = { dpid: 'iss-ndl-opac', item: 'any', idx: 1, per_page: 10, raw: false, mediatype: 1 }.merge(options)
  doc = nil
  results = {}
  startrecord = options[:idx].to_i
  startrecord = 1 if startrecord == 0
  url = "https://iss.ndl.go.jp/api/opensearch?dpid=#{options[:dpid]}&#{options[:item]}=#{format_query(query)}&cnt=#{options[:per_page]}&idx=#{startrecord}&mediatype=#{options[:mediatype]}"
  if options[:raw] == true
    Faraday.get(url).body
  else
    RSS::Rss::Channel.install_text_element('openSearch:totalResults', 'http://a9.com/-/spec/opensearchrss/1.0/', '?', 'totalResults', :text, 'openSearch:totalResults')
    RSS::BaseListener.install_get_text_element 'http://a9.com/-/spec/opensearchrss/1.0/', 'totalResults', 'totalResults='
    feed = RSS::Parser.parse(url, false)
  end
end