Module: TaxonWorks::Vendor::Serrano

Defined in:
lib/vendor/serrano.rb

Overview

A middle-layer wrapper between Serrano and TaxonWorks

Defined Under Namespace

Classes: CrossRefLaTeX

Constant Summary collapse

CUTOFF =
50.0

Class Method Summary collapse

Class Method Details

.bibtex_from_citproc(c, b) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/vendor/serrano.rb', line 93

def self.bibtex_from_citproc(c, b)
  return nil unless c.present? && b.present?
  c = JSON.parse(c)

  b[:address] = ::Utilities::Strings.encode_with_utf8(c['address']) unless c['address'].blank?
  #b[:author]
  b[:booktitle] = ::Utilities::Strings.encode_with_utf8(c['booktitle']) unless c['booktitle'].blank?
  b[:chapter] = c['chapter'] unless c['chapter'].blank?
  b[:edition] = c['edition'] unless c['edition'].blank?
  #b[:editor]
  b[:howpublished] = ::Utilities::Strings.encode_with_utf8(c['how-published']) unless c['how-published'].blank?
  b[:institution] = ::Utilities::Strings.encode_with_utf8(c['institution']) unless c['institution'].blank?
  b[:journal] = ::Utilities::Strings.encode_with_utf8(c['container-title']) unless c['container-title'].blank?
  #b[:month]
  b[:note] = ::Utilities::Strings.encode_with_utf8(c['note']) unless c['note'].blank?
  #b[:number] = c['number'] unless c['volume'].blank?
  b[:organization] = ::Utilities::Strings.encode_with_utf8(c['organization']) unless c['organization'].blank?
  b[:pages] = c['page'] unless c['page'].blank?
  b[:publisher] = ::Utilities::Strings.encode_with_utf8(c['publisher']) unless c['publisher'].blank?
  b[:school] = ::Utilities::Strings.encode_with_utf8(c['school']) unless c['school'].blank?
  b[:series] = c['series'] unless c['series'].blank?
  #b[:title] = c['title']
  # b[:typeb]  # "Source::Bibtex"
  #b[:volume] = c['volume'] unless c['volume'].blank?
  #b[:doi] = c['DOI']
  unless c['abstract'].blank?
    b[:abstract] = ::Utilities::Strings.encode_with_utf8(c['abstract']).
      gsub('</jats:p>', '').
      gsub('<jats:p>', '').
      gsub('</jats:sec>', '').
      gsub('<jats:sec>', '').
      gsub('</jats:title>', '</b>').
      gsub('<jats:title>', '<b>').
      gsub('</jats:italic>', '</i>').
      gsub('<jats:italic>', '<i>').
      gsub('</jats:bold>', '</b>').
      gsub('<jats:bold>', '<b>')
  end
  b[:copyright] = c['copyright'] unless c['copyright'].blank?
  #b[:bibtex_type] = c['type'] unless c['type'].blank?
  b[:day]  = c['issued']['date-parts'][0][2] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][2]
  b[:year] = c['issued']['date-parts'][0][0] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][0]
  b[:isbn] = c['ISBN'].first unless c['ISBN'].blank?
  b[:issn] = c['ISSN'].first unless c['ISSN'].blank?
  #b[:translator]
  #b[:url]
  b[:serial_id] = Serial.where(name: b[:journal]).first.try(:id) unless b[:journal].blank?
  b
end

.citation_is_valid_doi?(citation) ⇒ Boolean

Returns Boolean use our global identifier class to determined if value is DOI this isn’t super robust, but maybe OK.

Returns:

  • (Boolean)

    Boolean use our global identifier class to determined if value is DOI this isn’t super robust, but maybe OK



157
158
159
160
161
# File 'lib/vendor/serrano.rb', line 157

def self.citation_is_valid_doi?(citation)
  doi = Identifier::Global::Doi.new(identifier: citation)
  doi.valid?
  !doi.errors.has_key?(:identifier)
end

.cutoffFloat

Returns:

  • (Float)


9
10
11
# File 'lib/vendor/serrano.rb', line 9

def self.cutoff
  CUTOFF
end

.get_bibtex_string(citation, format = 'bibtex') ⇒ String?

Returns ; format == ‘bibtex’ or ‘citeproc’.

Returns:

  • (String, nil)

    ; format == ‘bibtex’ or ‘citeproc’



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/vendor/serrano.rb', line 67

def self.get_bibtex_string(citation, format = 'bibtex')
  begin
    # Convert citation to DOI if it isn't already
    if !citation_is_valid_doi?(citation)
      # First item should be the one with highest score/relevance: https://github.com/CrossRef/rest-api-doc#sort-order
      res = ::Serrano.works(query: citation, limit: 1)&.dig("message", "items")&.first
      # citation = Serrano.works(query: citation)&.dig("message", "items")&.max_by { |i| i["score"] }&.dig("DOI") unless citation_is_valid_doi?(citation)

      score = res&.dig("score") || -1.0
      citation = (score >= CUTOFF) ? res&.dig("DOI") : nil
    end

    if format == 'bibtex'
      bibtex = ::Serrano.content_negotiation(ids: unurize_doi(citation), format: "bibtex") unless citation.nil?
      return bibtex =~ /^\s*@/ ? bibtex : nil
    elsif format == 'citeproc'
      citeproc = ::Serrano.content_negotiation(ids: unurize_doi(citation), format: "citeproc-json") unless citation.nil?
      return citeproc
    else
      return nil
    end
  rescue
    return nil
  end
end

.new_from_citation(citation: nil) ⇒ Source::BibTex.new, ...

TODO: attempt to extract DOI from full string

Four possible paths: 1) citation. 2) citation which includes a doi. 3) naked doi, e.g., ‘10.3897/zookeys.20.205’. 4) doi with preamble, e.g., ‘dx.doi.org/10.3897/zookeys.20.205’ or

'https://doi.org/10.3897/zookeys.20.205'.

Returns:

  • (Source::BibTex.new)

    a new instance

  • (Source::Verbatim.new)

    a new instance

  • (false)


50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/vendor/serrano.rb', line 50

def self.new_from_citation(citation: nil)
  citation&.strip!
  return false if citation.length < 6
  # check string encoding, if not UTF-8, check if compatible with UTF-8,
  # if so convert to UTF-8 and parse with latex, else use type verbatim
  a = get_bibtex_string(citation, 'bibtex')
  b = ::Utilities::Strings.encode_with_utf8(a) if a
  if b
    bibtex = Source::Bibtex.new_from_bibtex(BibTeX::Bibliography.parse(b, filter: CrossRefLaTeX.instance).first)
    citeproc = get_bibtex_string(citation, 'citeproc')
    bibtex_from_citproc(citeproc, bibtex)
  else
    Source::Verbatim.new(verbatim: a ? a : citation)
  end
end

.unurize_doi(doi) ⇒ String

Returns:

  • (String)


144
145
146
147
148
149
150
151
152
# File 'lib/vendor/serrano.rb', line 144

def self.unurize_doi(doi)
  doi = doi.strip

  if matches = doi.match(/https?:\/\/[^\/]+\/(.*)/)
    matches[1]
  else
    doi
  end
end