Module: Bolognese::Utils

Extended by:: Utils

Included in:: CLI, Metadata, MetadataUtils, Utils

Defined in:: lib/bolognese/utils.rb

Constant Summary collapse

NORMALIZED_LICENSES =

{
  "https://creativecommons.org/licenses/by/1.0" => "https://creativecommons.org/licenses/by/1.0/legalcode",
  "https://creativecommons.org/licenses/by/2.0" => "https://creativecommons.org/licenses/by/2.0/legalcode",
  "https://creativecommons.org/licenses/by/2.5" => "https://creativecommons.org/licenses/by/2.5/legalcode",
  "https://creativecommons.org/licenses/by/3.0" => "https://creativecommons.org/licenses/by/3.0/legalcode",
  "https://creativecommons.org/licenses/by/3.0/us" => "https://creativecommons.org/licenses/by/3.0/legalcode",
  "https://creativecommons.org/licenses/by/4.0" => "https://creativecommons.org/licenses/by/4.0/legalcode",
  "https://creativecommons.org/licenses/by-nc/1.0" => "https://creativecommons.org/licenses/by-nc/1.0/legalcode",
  "https://creativecommons.org/licenses/by-nc/2.0" => "https://creativecommons.org/licenses/by-nc/2.0/legalcode",
  "https://creativecommons.org/licenses/by-nc/2.5" => "https://creativecommons.org/licenses/by-nc/2.5/legalcode",
  "https://creativecommons.org/licenses/by-nc/3.0" => "https://creativecommons.org/licenses/by-nc/3.0/legalcode",
  "https://creativecommons.org/licenses/by-nc/4.0" => "https://creativecommons.org/licenses/by-nc/4.0/legalcode",
  "https://creativecommons.org/licenses/by-nd-nc/1.0" => "https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode",
  "https://creativecommons.org/licenses/by-nd-nc/2.0" => "https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode",
  "https://creativecommons.org/licenses/by-nd-nc/2.5" => "https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode",
  "https://creativecommons.org/licenses/by-nd-nc/3.0" => "https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode",
  "https://creativecommons.org/licenses/by-nd-nc/4.0" => "https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode",
  "https://creativecommons.org/licenses/by-nc-sa/1.0" => "https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode",
  "https://creativecommons.org/licenses/by-nc-sa/2.0" => "https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode",
  "https://creativecommons.org/licenses/by-nc-sa/2.5" => "https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode",
  "https://creativecommons.org/licenses/by-nc-sa/3.0" => "https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode",
  "https://creativecommons.org/licenses/by-nc-sa/4.0" => "https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode",
  "https://creativecommons.org/licenses/by-nd/1.0" => "https://creativecommons.org/licenses/by-nd/1.0/legalcode",
  "https://creativecommons.org/licenses/by-nd/2.0" => "https://creativecommons.org/licenses/by-nd/2.0/legalcode",
  "https://creativecommons.org/licenses/by-nd/2.5" => "https://creativecommons.org/licenses/by-nd/2.5/legalcode",
  "https://creativecommons.org/licenses/by-nd/3.0" => "https://creativecommons.org/licenses/by-nd/3.0/legalcode",
  "https://creativecommons.org/licenses/by-nd/4.0" => "https://creativecommons.org/licenses/by-nd/4.0/legalcode",
  "https://creativecommons.org/licenses/by-sa/1.0" => "https://creativecommons.org/licenses/by-sa/1.0/legalcode",
  "https://creativecommons.org/licenses/by-sa/2.0" => "https://creativecommons.org/licenses/by-sa/2.0/legalcode",
  "https://creativecommons.org/licenses/by-sa/2.5" => "https://creativecommons.org/licenses/by-sa/2.5/legalcode",
  "https://creativecommons.org/licenses/by-sa/3.0" => "https://creativecommons.org/licenses/by-sa/3.0/legalcode",
  "https://creativecommons.org/licenses/by-sa/4.0" => "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
  "https://creativecommons.org/licenses/by-nc-nd/1.0" => "https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode",
  "https://creativecommons.org/licenses/by-nc-nd/2.0" => "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode",
  "https://creativecommons.org/licenses/by-nc-nd/2.5" => "https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode",
  "https://creativecommons.org/licenses/by-nc-nd/3.0" => "https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode",
  "https://creativecommons.org/licenses/by-nc-nd/4.0" => "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode",
  "https://creativecommons.org/licenses/publicdomain" => "https://creativecommons.org/licenses/publicdomain/",
  "https://creativecommons.org/publicdomain/zero/1.0" => "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
}

DC_TO_SO_TRANSLATIONS =

{
  "Audiovisual" => "MediaObject",
  "Book" => "Book",
  "BookChapter" => "Chapter",
  "Collection" => "Collection",
  "ComputationalNotebook" => "SoftwareSourceCode",
  "ConferencePaper" => "Article",
  "ConferenceProceeding" => "Periodical",
  "DataPaper" => "Article",
  "Dataset" => "Dataset",
  "Dissertation" => "Thesis",
  "Event" => "Event",
  "Image" => "ImageObject",
  "InteractiveResource" => nil,
  "Journal" => "Periodical",
  "JournalArticle" => "ScholarlyArticle",
  "Model" => nil,
  "OutputManagementPlan" => nil,
  "PeerReview" => "Review",
  "PhysicalObject" => nil,
  "Preprint" => nil,
  "Report" => "Report",
  "Service" => "Service",
  "Software" => "SoftwareSourceCode",
  "Sound" => "AudioObject",
  "Standard" => nil,
  "Text" => "ScholarlyArticle",
  "Workflow" => nil,
  "Other" => "CreativeWork",
  # not part of DataCite schema, but used internally
  "Periodical" => "Periodical",
  "DataCatalog" => "DataCatalog",
  "Award" => "Grant",
  "Project" => "Project"
}

DC_TO_CP_TRANSLATIONS =

{
  "Audiovisual" => "motion_picture",
  "Book" => "book",
  "BookChapter" => "chapter",
  "Collection" => nil,
  "ComputationalNotebook" => nil,
  "ConferencePaper" => "paper_conference",
  "ConferenceProceeding" => nil,
  "DataPaper" => "report",
  "Dataset" => "dataset",
  "Dissertation" => nil,
  "Event" => nil,
  "Image" => "graphic",
  "InteractiveResource" => nil,
  "Journal" => nil,
  "JournalArticle" => "article-journal",
  "Model" => nil,
  "OutputManagementPlan" => nil,
  "PeerReview" => "review",
  "PhysicalObject" => nil,
  "Preprint" => nil,
  "Report" => "report",
  "Service" => nil,
  "Sound" => "song",
  "Standard" => nil,
  "Text" => "report",
  "Workflow" => nil,
  "Other" => nil
}

CR_TO_CP_TRANSLATIONS =

{
  "Proceedings" => nil,
  "ReferenceBook" => nil,
  "JournalIssue" => nil,
  "ProceedingsArticle" => "paper-conference",
  "Other" => nil,
  "Dissertation" => "thesis",
  "Dataset" => "dataset",
  "EditedBook" => "book",
  "JournalArticle" => "article-journal",
  "Journal" => nil,
  "Report" => "report",
  "BookSeries" => nil,
  "ReportSeries" => nil,
  "BookTrack" => nil,
  "Standard" => nil,
  "BookSection" => "chapter",
  "BookPart" => nil,
  "Book" => "book",
  "BookChapter" => "chapter",
  "StandardSeries" => nil,
  "Monograph" => "book",
  "Component" => nil,
  "ReferenceEntry" => "entry-dictionary",
  "JournalVolume" => nil,
  "BookSet" => nil
}

CR_TO_SO_TRANSLATIONS =

{
  "Proceedings" => nil,
  "ReferenceBook" => "Book",
  "JournalIssue" => "PublicationIssue",
  "ProceedingsArticle" => nil,
  "Other" => "CreativeWork",
  "Dissertation" => "Thesis",
  "Dataset" => "Dataset",
  "EditedBook" => "Book",
  "JournalArticle" => "ScholarlyArticle",
  "Journal" => nil,
  "Report" => "Report",
  "BookSeries" => nil,
  "ReportSeries" => nil,
  "BookTrack" => nil,
  "Standard" => nil,
  "BookSection" => nil,
  "BookPart" => nil,
  "Book" => "Book",
  "BookChapter" => "Chapter",
  "StandardSeries" => nil,
  "Monograph" => "Book",
  "Component" => "CreativeWork",
  "ReferenceEntry" => nil,
  "JournalVolume" => "PublicationVolume",
  "BookSet" => nil,
  "PostedContent" => "ScholarlyArticle",
  "PeerReview" => "Review"
}

CR_TO_BIB_TRANSLATIONS =

{
  "Proceedings" => "proceedings",
  "ReferenceBook" => "book",
  "JournalIssue" => nil,
  "ProceedingsArticle" => nil,
  "Other" => nil,
  "Dissertation" => "phdthesis",
  "Dataset" => nil,
  "EditedBook" => "book",
  "JournalArticle" => "article",
  "Journal" => nil,
  "Report" => "techreport",
  "BookSeries" => nil,
  "ReportSeries" => nil,
  "BookTrack" => nil,
  "Standard" => nil,
  "BookSection" => "inbook",
  "BookPart" => nil,
  "Book" => "book",
  "BookChapter" => "inbook",
  "StandardSeries" => nil,
  "Monograph" => "book",
  "Component" => nil,
  "ReferenceEntry" => nil,
  "JournalVolume" => nil,
  "BookSet" => nil,
  "PostedContent" => "article"
}

BIB_TO_CR_TRANSLATIONS =

{
  "proceedings" => "Proceedings",
  "phdthesis" => "Dissertation",
  "article" => "JournalArticle",
  "book" => "Book",
  "inbook" => "BookChapter"
}

CR_TO_JATS_TRANSLATIONS =

{
  "Proceedings" => "working-paper",
  "ReferenceBook" => "book",
  "JournalIssue" => "journal",
  "ProceedingsArticle" => "working-paper",
  "Other" => nil,
  "Dissertation" => nil,
  "Dataset" => "data",
  "EditedBook" => "book",
  "JournalArticle" => "journal",
  "Journal" => "journal",
  "Report" => "report",
  "BookSeries" => "book",
  "ReportSeries" => "report",
  "BookTrack" => "book",
  "Standard" => "standard",
  "BookSection" => "chapter",
  "BookPart" => "chapter",
  "Book" => "book",
  "BookChapter" => "chapter",
  "StandardSeries" => "standard",
  "Monograph" => "book",
  "Component" => nil,
  "ReferenceEntry" => nil,
  "JournalVolume" => "journal",
  "BookSet" => "book"
}

CR_TO_DC_TRANSLATIONS =

{
  "Proceedings" => nil,
  "ReferenceBook" => nil,
  "JournalIssue" => "Text",
  "ProceedingsArticle" => "ConferencePaper",
  "Other" => "Other",
  "Dissertation" => "Dissertation",
  "Dataset" => "Dataset",
  "EditedBook" => "Book",
  "JournalArticle" => "JournalArticle",
  "Journal" => "Journal",
  "Report" => "Report",
  "BookSeries" => nil,
  "ReportSeries" => nil,
  "BookTrack" => nil,
  "Standard" => "Standard",
  "BookSection" => "BookChapter",
  "BookPart" => nil,
  "Book" => "Book",
  "BookChapter" => "BookChapter",
  "SaComponent" => "Text",
  "StandardSeries" => "Standard",
  "Monograph" => "book",
  "Component" => nil,
  "ReferenceEntry" => nil,
  "JournalVolume" => nil,
  "BookSet" => nil,
  "PostedContent" => "JournalArticle",
  "PeerReview" => "PeerReview"
}

SO_TO_DC_TRANSLATIONS =

{
  "Article" => "Text",
  "AudioObject" => "Sound",
  "Blog" => "Text",
  "BlogPosting" => "Text",
  "Book" => "Book",
  "Chapter" => "BookChapter",
  "Collection" => "Collection",
  "DataCatalog" => "Dataset",
  "Dataset" => "Dataset",
  "Event" => "Event",
  "ImageObject" => "Image",
  "Movie" => "Audiovisual",
  "PublicationIssue" => "Text",
  "Report" => "Report",
  "ScholarlyArticle" => "Text",
  "Thesis" => "Text",
  "Service" => "Service",
  "Review" => "PeerReview",
  "SoftwareSourceCode" => "Software",
  "VideoObject" => "Audiovisual",
  "WebPage" => "Text",
  "WebSite" => "Text"
}

SO_TO_JATS_TRANSLATIONS =

{
  "Article" => "journal",
  "AudioObject" => nil,
  "Blog" => nil,
  "BlogPosting" => nil,
  "Book" => "book",
  "Collection" => nil,
  "CreativeWork" => nil,
  "DataCatalog" => "data",
  "Dataset" => "data",
  "Event" => nil,
  "ImageObject" => nil,
  "Movie" => nil,
  "PublicationIssue" => "journal",
  "ScholarlyArticle" => "journal",
  "Service" => nil,
  "SoftwareSourceCode" => "software",
  "VideoObject" => nil,
  "WebPage" => nil,
  "WebSite" => "website"
}

SO_TO_CP_TRANSLATIONS =

{
  "Article" => "",
  "AudioObject" => "song",
  "Blog" => "report",
  "BlogPosting" => "post-weblog",
  "Collection" => nil,
  "CreativeWork" => nil,
  "DataCatalog" => "dataset",
  "Dataset" => "dataset",
  "Event" => nil,
  "ImageObject" => "graphic",
  "Movie" => "motion_picture",
  "PublicationIssue" => nil,
  "Report" => "report",
  "ScholarlyArticle" => "article-journal",
  "Service" => nil,
  "Thesis" => "thesis",
  "VideoObject" => "broadcast",
  "WebPage" => "webpage",
  "WebSite" => "webpage"
}

SO_TO_RIS_TRANSLATIONS =

{
  "Article" => nil,
  "AudioObject" => nil,
  "Blog" => nil,
  "BlogPosting" => "BLOG",
  "Collection" => nil,
  "CreativeWork" => "GEN",
  "DataCatalog" => "CTLG",
  "Dataset" => "DATA",
  "Event" => nil,
  "ImageObject" => "FIGURE",
  "Movie" => "MPCT",
  "Report" => "RPRT",
  "PublicationIssue" => nil,
  "ScholarlyArticle" => "JOUR",
  "Service" => nil,
  "SoftwareSourceCode" => "COMP",
  "VideoObject" => "VIDEO",
  "WebPage" => "ELEC",
  "WebSite" => nil
}

CR_TO_RIS_TRANSLATIONS =

{
  "Proceedings" => "CONF",
  "ReferenceBook" => "BOOK",
  "JournalIssue" => nil,
  "ProceedingsArticle" => "CPAPER",
  "Other" => "GEN",
  "Dissertation" => "THES",
  "Dataset" => "DATA",
  "EditedBook" => "BOOK",
  "JournalArticle" => "JOUR",
  "Journal" => nil,
  "Report" => "RPRT",
  "BookSeries" => nil,
  "ReportSeries" => nil,
  "BookTrack" => nil,
  "Standard" => "STAND",
  "BookSection" => "CHAP",
  "BookPart" => "CHAP",
  "Book" => "BOOK",
  "BookChapter" => "CHAP",
  "StandardSeries" => nil,
  "Monograph" => "BOOK",
  "Component" => nil,
  "ReferenceEntry" => "DICT",
  "JournalVolume" => nil,
  "BookSet" => nil
}

DC_TO_RIS_TRANSLATIONS =

{
  "Audiovisual" => "MPCT",
  "Book" => "BOOK",
  "BookChapter" => "CHAP",
  "Collection" => nil,
  "ComputationalNotebook" => "COMP",
  "ConferencePaper" => "CPAPER",
  "ConferenceProceeding" => "CONF",
  "DataPaper" => nil,
  "Dataset" => "DATA",
  "Dissertation" => "THES",
  "Event" => nil,
  "Image" => "FIGURE",
  "InteractiveResource" => nil,
  "Journal" => nil,
  "JournalArticle" => "JOUR",
  "Model" => nil,
  "OutputManagementPlan" => nil,
  "PeerReview" => nil,
  "PhysicalObject" => nil,
  "Preprint" => nil,
  "Report" => "RRPT",
  "Service" => nil,
  "Software" => "COMP",
  "Sound" => "SOUND",
  "Standard" => nil,
  "Text" => "RPRT",
  "Workflow" => nil,
  "Other" => nil
}

RIS_TO_DC_TRANSLATIONS =

{
  "BLOG" => "Text",
  "GEN" => "Text",
  "CTLG" => "Collection",
  "DATA" => "Dataset",
  "FIGURE" => "Image",
  "THES" => "Dissertation",
  "MPCT" => "Audiovisual",
  "JOUR" => "JournalArticle",
  "COMP" => "Software",
  "VIDEO" => "Audiovisual",
  "ELEC" => "Text"
}

BIB_TO_DC_TRANSLATIONS =

{
  "article" => "JournalArticle",
  "book" => "Book",
  "inbook" => "BookChapter",
  "inproceedings" => nil,
  "manual" => nil,
  "misc" => "Other",
  "phdthesis" => "Dissertation",
  "proceedings" => "ConferenceProceeding",
  "techreport" => "Report",
  "unpublished" => nil
}

CP_TO_DC_TRANSLATIONS =

{
  "song" => "Audiovisual",
  "post-weblog" => "Text",
  "dataset" => "Dataset",
  "graphic" => "Image",
  "motion_picture" => "Audiovisual",
  "article-journal" => "JournalArticle",
  "broadcast" => "Audiovisual",
  "webpage" => "Text"
}

SO_TO_BIB_TRANSLATIONS =

{
  "Article" => "article",
  "AudioObject" => "misc",
  "Thesis" => "phdthesis",
  "Blog" => "misc",
  "BlogPosting" => "article",
  "Collection" => "misc",
  "CreativeWork" => "misc",
  "DataCatalog" => "misc",
  "Dataset" => "misc",
  "Event" => "misc",
  "ImageObject" => "misc",
  "Movie" => "misc",
  "PublicationIssue" => "misc",
  "ScholarlyArticle" => "article",
  "Service" => "misc",
  "SoftwareSourceCode" => "misc",
  "VideoObject" => "misc",
  "WebPage" => "misc",
  "WebSite" => "misc"
}

UNKNOWN_INFORMATION =

{
  ":unac" => "temporarily inaccessible",
  ":unal" => "unallowed, suppressed intentionally",
  ":unap" => "not applicable, makes no sense",
  ":unas" => "value unassigned (e.g., Untitled)",
  ":unav" => "value unavailable, possibly unknown",
  ":unkn" => "known to be unknown (e.g., Anonymous, Inconnue)",
  ":none" => "never had a value, never will",
  ":null" => "explicitly and meaningfully empty",
  ":tba" => "to be assigned or announced later",
  ":etal" => "too numerous to list (et alia)"
}

RESOURCE_PATHS =

{
  spdx: 'spdx/licenses.json',
  fos: 'oecd/fos-mappings.json',
  for: 'oecd/for-mappings.json',
  dfg: 'oecd/dfg-mappings.json'
}

Instance Method Summary collapse

Instance Method Details

#abstract_description ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1433

def abstract_description
  # Fetch the first description with descriptionType "Abstract"
  Array.wrap(descriptions)&.find { |d| d["descriptionType"] == "Abstract" }
end

#dfg_ids_to_fos(dfg_ids) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1418

def dfg_ids_to_fos(dfg_ids)
  dfgs = resource_json(:dfg).fetch("dfgFields")
  ids = Array.wrap(dfg_ids)

  subjects = dfgs.select { |l| ids.include?(l["dfgId"])}
  subjects.map do |subject|
    {
      "classificationCode" => subject["fosId"],
      "subject" =>  subject["fosLabel"],
      "subjectScheme" => "Fields of Science and Technology (FOS)",
      "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
    }
  end
end

#find_from_format(id: nil, string: nil, ext: nil, filename: nil) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 515

def find_from_format(id: nil, string: nil, ext: nil, filename: nil)
  if id.present?
    find_from_format_by_id(id)
  elsif ext.present?
    find_from_format_by_filename(filename) || find_from_format_by_ext(string, ext: ext)
  elsif string.present?
    find_from_format_by_string(string)
  else
    "datacite"
  end
end

#find_from_format_by_ext(string, options = {}) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 550

def find_from_format_by_ext(string, options={})
  if options[:ext] == ".bib"
    "bibtex"
  elsif options[:ext] == ".ris"
    "ris"
  elsif options[:ext] == ".xml" && Maremma.from_xml(string).to_h.dig("crossref_result", "query_result", "body", "query", "doi_record", "crossref")
    "crossref"
  elsif options[:ext] == ".xml" && Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
    "datacite"
  elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
    "schema_org"
  elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
    "codemeta"
  elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel")
    "datacite_json"
  elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("types") && Maremma.from_json(string).to_h.dig("publication_year").present?
    "crosscite"
  elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
    "citeproc"
  end
end

#find_from_format_by_filename(filename) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 544

def find_from_format_by_filename(filename)
  if filename == "package.json"
    "npm"
  end
end

#find_from_format_by_id(id) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 527

def find_from_format_by_id(id)
  id = normalize_id(id)

  if /\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.stage.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(id)
    ra = get_doi_ra(id)
    %w(DataCite Crossref mEDRA KISTI JaLC OP).include?(ra) ? ra.downcase : nil
  elsif /\A(?:(http|https):\/(\/)?orcid\.org\/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z/.match(id)
    "orcid"
  elsif /\A(http|https):\/(\/)?github\.com\/(.+)\/package.json\z/.match(id)
    "npm"
  elsif /\A(http|https):\/(\/)?github\.com\/(.+)\z/.match(id)
    "codemeta"
  else
    "schema_org"
  end
end

#find_from_format_by_string(string) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 572

def find_from_format_by_string(string)
  if Maremma.from_xml(string).to_h.dig("crossref_result", "query_result", "body", "query", "doi_record", "crossref").present?
    "crossref"
  elsif Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
    "datacite"
  elsif Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
    "schema_org"
  elsif Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
    "codemeta"
  elsif Maremma.from_json(string).to_h.dig("schema-version").to_s.start_with?("http://datacite.org/schema/kernel")
    "datacite_json"
  elsif Maremma.from_json(string).to_h.dig("types").present? && Maremma.from_json(string).to_h.dig("publication_year").present?
    "crosscite"
  elsif Maremma.from_json(string).to_h.dig("issued", "date-parts").present?
    "citeproc"
  elsif string.start_with?("TY  - ")
    "ris"
  elsif BibTeX.parse(string).first
    "bibtex"
  end
rescue BibTeX::ParseError => error
  nil
end

#from_citeproc(element) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1033

def from_citeproc(element)
  Array.wrap(element).map do |a|
    if a["literal"].present?
      a["@type"] = "Organization"
      a["creatorName"] = a["literal"]
    else
      a["@type"] = "Person"
      a["name"] = [a["given"], a["family"]].compact.join(" ")
    end
    a["givenName"] = a["given"]
    a["familyName"] = a["family"]
    a.except("given", "family", "literal").compact
  end.unwrap
end

#from_datacite_json(element) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 793

def from_datacite_json(element)
  Array.wrap(element).map do |e|
    e.inject({}) {|h, (k,v)| h[k.underscore] = v; h }
  end
end

#from_schema_org(element) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 957

def from_schema_org(element)
  mapping = { "@type" => "type", "@id" => "id" }

  map_hash_keys(element: element, mapping: mapping)
end

#from_schema_org_contributors(element) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 988

def from_schema_org_contributors(element)
  element = Array.wrap(element).map do |c|
    if c["affiliation"].is_a?(String)
      c["affiliation"] = { "name" => c["affiliation"] }
      affiliation_identifier_scheme = nil
      scheme_uri = nil
    elsif c.dig("affiliation", "@id").to_s.starts_with?("https://ror.org")
      affiliation_identifier_scheme = "ROR"
      scheme_uri = "https://ror.org/"
    elsif c.dig("affiliation", "@id").to_s.starts_with?("https://isni.org")
      affiliation_identifier_scheme = "ISNI"
      scheme_uri = "https://isni.org/isni/"
    else
      affiliation_identifier_scheme = nil
      scheme_uri = nil
    end

    c["nameIdentifier"] = [{ "__content__" => c["@id"], "nameIdentifierScheme" => "ORCID", "schemeUri" => "https://orcid.org" }] if normalize_orcid(c["@id"])
    c["contributorName"] = { "nameType" => c["@type"].present? ? c["@type"].titleize + "al" : nil, "__content__" => c["name"] }.compact
    c["affiliation"] = { "__content__" => c.dig("affiliation", "name"), "affiliationIdentifier" => c.dig("affiliation", "@id"), "affiliationIdentifierScheme" => affiliation_identifier_scheme, "schemeUri" => scheme_uri }.compact.presence
    c.except("@id", "@type", "name").compact
  end
end

#from_schema_org_creators(element) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 963

def from_schema_org_creators(element)
  element = Array.wrap(element).map do |c|
    if c["affiliation"].is_a?(String)
      c["affiliation"] = { "name" => c["affiliation"] }
      affiliation_identifier_scheme = nil
      scheme_uri = nil
    elsif c.dig("affiliation", "@id").to_s.starts_with?("https://ror.org")
      affiliation_identifier_scheme = "ROR"
      scheme_uri = "https://ror.org/"
    elsif c.dig("affiliation", "@id").to_s.starts_with?("https://isni.org")
      affiliation_identifier_scheme = "ISNI"
      scheme_uri = "https://isni.org/isni/"
    else
      affiliation_identifier_scheme = nil
      scheme_uri = nil
    end

    c["nameIdentifier"] = [{ "__content__" => c["@id"], "nameIdentifierScheme" => "ORCID", "schemeUri" => "https://orcid.org" }] if normalize_orcid(c["@id"])
    c["@type"] = c["@type"].find { |t| %w(Person Organization).include?(t) } if c["@type"].is_a?(Array)
    c["creatorName"] = { "nameType" => c["@type"].present? ? c["@type"].titleize + "al" : nil, "__content__" => c["name"] }.compact
    c["affiliation"] = { "__content__" => c.dig("affiliation", "name"), "affiliationIdentifier" => c.dig("affiliation", "@id"), "affiliationIdentifierScheme" => affiliation_identifier_scheme, "schemeUri" => scheme_uri }.compact.presence
    c.except("@id", "@type", "name").compact
  end
end

#generate_container(types, related_items, related_identifiers, descriptions) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1438

def generate_container(types, related_items, related_identifiers, descriptions)
  container_type = (types.respond_to?(:dig) && types&.dig("resourceTypeGeneral")) == "Dataset" ? "DataRepository" : "Series"

  # relatedItem container
  related_item = Array.wrap(related_items).find { |ri| ri["relationType"] == "IsPublishedIn" }.to_h

  if related_item.present?
    return {
      "type" => container_type,
      "identifier" => related_item.dig("relatedItemIdentifier", "relatedItemIdentifier"),
      "identifierType" => related_item.dig("relatedItemIdentifier", "relatedItemIdentifierType"),
      "title" => related_item.dig("titles", 0).then { |t| t ? parse_attributes(t, content: "title", first: true) : nil },
      "volume" => related_item["volume"],
      "issue" => related_item["issue"],
      "edition" => related_item["edition"],
      "number" => related_item["number"],
      "chapterNumber" => related_item["numberType"] == "Chapter" ? related_item["number"] : nil,
      "firstPage" => related_item["firstPage"],
      "lastPage" => related_item["lastPage"]
    }.compact
  end

  # Legacy SeriesInformation/relatedIdentifier container fallback 
  series_information = Array.wrap(descriptions).find { |r| r["descriptionType"] == "SeriesInformation" }.to_h.fetch("description", nil)
  si = get_series_information(series_information)

  is_part_of = Array.wrap(related_identifiers).find { |ri| ri["relationType"] == "IsPartOf" }.to_h

  if si["title"].present?
    return {
      "type" => container_type,
      "identifier" => is_part_of["relatedIdentifier"],
      "identifierType" => is_part_of["relatedIdentifierType"],
      "title" => si["title"],
      "volume" => si["volume"],
      "issue" => si["issue"],
      "firstPage" => si["firstPage"],
      "lastPage" => si["lastPage"]
    }.compact
  end
end

#get_contributor(contributor, contributor_type) ⇒ `Object`



1209
1210
1211

# File 'lib/bolognese/utils.rb', line 1209

def get_contributor(contributor, contributor_type)
  contributor.select { |c| c["contributorType"] == contributor_type }
end

#get_date(dates, date_type) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1204

def get_date(dates, date_type)
  dd = Array.wrap(dates).find { |d| d["dateType"] == date_type } || {}
  dd.fetch("date", nil)
end

#get_date_from_date_parts(date_as_parts) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1148

def get_date_from_date_parts(date_as_parts)
  date_parts = date_as_parts.fetch("date-parts", []).first
  year, month, day = date_parts[0], date_parts[1], date_parts[2]
  get_date_from_parts(year, month, day)
end

#get_date_from_parts(year, month = nil, day = nil) ⇒ `Object`



1154
1155
1156

# File 'lib/bolognese/utils.rb', line 1154

def get_date_from_parts(year, month = nil, day = nil)
  [year.to_s.rjust(4, '0'), month.to_s.rjust(2, '0'), day.to_s.rjust(2, '0')].reject { |part| part == "00" }.join("-")
end

#get_date_parts(iso8601_time) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1137

def get_date_parts(iso8601_time)
  return { 'date-parts' => [[]] } if iso8601_time.nil?

  year = iso8601_time[0..3].to_i
  month = iso8601_time[5..6].to_i
  day = iso8601_time[8..9].to_i
  { 'date-parts' => [[year, month, day].reject { |part| part == 0 }] }
rescue TypeError
  nil
end

#get_date_parts_from_parts(year, month = nil, day = nil) ⇒ `Object`



1158
1159
1160

# File 'lib/bolognese/utils.rb', line 1158

def get_date_parts_from_parts(year, month = nil, day = nil)
  { 'date-parts' => [[year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }] }
end

#get_datetime_from_iso8601(iso8601_time) ⇒ `Object`

parsing of incomplete iso8601 timestamps such as 2015-04 is broken in standard library return nil if invalid iso8601 timestamp

# File 'lib/bolognese/utils.rb', line 1190

def get_datetime_from_iso8601(iso8601_time)
  ISO8601::DateTime.new(iso8601_time).to_time.utc
rescue
  nil
end

#get_datetime_from_time(time) ⇒ `Object`

iso8601 datetime without hyphens and colons, used by Crossref return nil if invalid

# File 'lib/bolognese/utils.rb', line 1198

def get_datetime_from_time(time)
  DateTime.strptime(time.to_s, "%Y%m%d%H%M%S").strftime('%Y-%m-%dT%H:%M:%SZ')
rescue ArgumentError
  nil
end

#get_identifier(identifiers, identifier_type) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1213

def get_identifier(identifiers, identifier_type)
  id = Array.wrap(identifiers).find { |i| i["identifierType"] == identifier_type } || {}
  id.fetch("identifier", nil)
end

#get_identifier_type(identifier_type) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1218

def get_identifier_type(identifier_type)
  return nil unless identifier_type.present?

  identifierTypes = {
    "ark" => "ARK",
    "arxiv" => "arXiv",
    "bibcode" => "bibcode",
    "doi" => "DOI",
    "ean13" => "EAN13",
    "eissn" => "EISSN",
    "handle" => "Handle",
    "igsn" => "IGSN",
    "isbn" => "ISBN",
    "issn" => "ISSN",
    "istc" => "ISTC",
    "lissn" => "LISSN",
    "lsid" => "LSID",
    "pmid" => "PMID",
    "purl" => "PURL",
    "upc" => "UPC",
    "url" => "URL",
    "urn" => "URN",
    "md5" => "md5",
    "minid" => "minid",
    "dataguid" => "dataguid",
    "cstr" => "CSTR",
    "rrid" => "RRID"
  }

  identifierTypes[identifier_type.downcase] || identifier_type
end

#get_iso8601_date(iso8601_time) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1162

def get_iso8601_date(iso8601_time)
  return nil if iso8601_time.nil?

  iso8601_time[0..9]
end

#get_series_information(str) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1250

def get_series_information(str)
  return {} unless str.present?

  str = str.split(",").map(&:strip)

  title = str.first
  volume_issue = str.length > 2 ? str[1].rpartition(/\(([^)]+)\)/) : nil
  volume = volume_issue.present? ? volume_issue[0].presence || volume_issue[2].presence : nil
  issue = volume_issue.present? ? volume_issue[1][1...-1].presence : nil
  pages = str.length > 1 ? str.last : nil
  first_page = pages.present? ? pages.split("-").map(&:strip)[0] : nil
  last_page = pages.present? ? pages.split("-").map(&:strip)[1] : nil

  {
    "title" => title,
    "volume" => volume,
    "issue" => issue,
    "firstPage" => first_page,
    "lastPage" => last_page }.compact
end

#get_year_month(iso8601_time) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1168

def get_year_month(iso8601_time)
  return [] if iso8601_time.nil?

  year = iso8601_time[0..3]
  month = iso8601_time[5..6]

  [year.to_i, month.to_i].reject { |part| part == 0 }
end

#get_year_month_day(iso8601_time) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1177

def get_year_month_day(iso8601_time)
  return [] if iso8601_time.nil?

  year = iso8601_time[0..3]
  month = iso8601_time[5..6]
  day = iso8601_time[8..9]

  [year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }
end

#github_as_codemeta_url(url) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1127

def github_as_codemeta_url(url)
  github_hash = github_from_url(url)

  if github_hash[:path].to_s.end_with?("codemeta.json")
    "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
  elsif github_hash[:owner].present?
    "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/master/codemeta.json"
  end
end

#github_as_owner_url(url) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1112

def github_as_owner_url(url)
  github_hash = github_from_url(url)
  "https://github.com/#{github_hash[:owner]}" if github_hash[:owner].present?
end

#github_as_release_url(url) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1122

def github_as_release_url(url)
  github_hash = github_from_url(url)
  "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}/tree/#{github_hash[:release]}" if github_hash[:release].present?
end

#github_as_repo_url(url) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1117

def github_as_repo_url(url)
  github_hash = github_from_url(url)
  "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}" if github_hash[:repo].present?
end

#github_from_url(url) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1089

def github_from_url(url)
  return {} unless /\Ahttps:\/\/github\.com\/(.+)(?:\/)?(.+)?(?:\/tree\/)?(.*)\z/.match(url)
  words = URI.parse(url).path[1..-1].split('/')
  path = words.length > 3 ? words[4...words.length].join("/") : nil

  { owner: words[0],
    repo: words[1],
    release: words[3],
    path: path }.compact
end

#github_owner_from_url(url) ⇒ `Object`



1108
1109
1110

# File 'lib/bolognese/utils.rb', line 1108

def github_owner_from_url(url)
  github_from_url(url).fetch(:owner, nil)
end

#github_release_from_url(url) ⇒ `Object`



1104
1105
1106

# File 'lib/bolognese/utils.rb', line 1104

def github_release_from_url(url)
  github_from_url(url).fetch(:release, nil)
end

#github_repo_from_url(url) ⇒ `Object`



1100
1101
1102

# File 'lib/bolognese/utils.rb', line 1100

def github_repo_from_url(url)
  github_from_url(url).fetch(:repo, nil)
end

#hsh_to_fos(hsh) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1357

def hsh_to_fos(hsh)
  # first find subject in Fields of Science (OECD)
  fos = resource_json(:fos).fetch("fosFields")
  subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] || l["fosLabel"] == hsh["subject"]}

  if subject
    return [{
      "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
      "subjectScheme" => hsh["subjectScheme"],
      "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
      "valueUri" => hsh["valueURI"] || hsh["valueUri"],
      "classificationCode" => hsh["classificationCode"],
      "lang" => hsh["lang"] }.compact,
    {
      "subject" => "FOS: " + subject["fosLabel"],
      "subjectScheme" => "Fields of Science and Technology (FOS)",
      "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
  end

  # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
  # and map to Fields of Science. Add an extra entry for the latter
  fores = resource_json(:for)
  for_fields = fores.fetch("forFields")
  for_disciplines = fores.fetch("forDisciplines")

  # try to extract forId
  if hsh["subjectScheme"] == "FOR"
    for_id = hsh["__content__"].to_s.split(" ").first || hsh["subject"].to_s.split(" ").first
    for_id = for_id.rjust(6, "0")

    subject = for_fields.find { |l| l["forId"] == for_id } ||
              for_disciplines.find { |l| l["forId"] == for_id[0..3] }
  else
    subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] } ||
              for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] || l["forLabel"] == hsh["subject"] }
  end

  if subject
    [{
      "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
      "subjectScheme" => hsh["subjectScheme"],
      "classificationCode" => hsh["classificationCode"],
      "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
      "valueUri" => hsh["valueURI"] || hsh["valueUri"],
      "lang" => hsh["lang"] }.compact,
    {
      "subject" => "FOS: " + subject["fosLabel"],
      "subjectScheme" => "Fields of Science and Technology (FOS)",
      "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
    }]
  else
    [{
      "subject" => sanitize(hsh["__content__"] || hsh["subject"]),
      "subjectScheme" => hsh["subjectScheme"],
      "classificationCode" => hsh["classificationCode"],
      "schemeUri" => hsh["schemeURI"] || hsh["schemeUri"],
      "valueUri" => hsh["valueURI"] || hsh["valueUri"],
      "lang" => hsh["lang"] }.compact]
  end
end

#hsh_to_spdx(hsh) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1296

def hsh_to_spdx(hsh)
  spdx = resource_json(:spdx).fetch("licenses")
  license = spdx.find { |l| l["licenseId"].casecmp?(hsh["rightsIdentifier"]) || l["seeAlso"].first == normalize_cc_url(hsh["rightsURI"]) || l["name"] == hsh["rights"] || l["seeAlso"].first == normalize_cc_url(hsh["rights"]) }

  if license
    {
      "rights" => license["name"],
      "rightsUri" => license["seeAlso"].first,
      "rightsIdentifier" => license["licenseId"].downcase,
      "rightsIdentifierScheme" => "SPDX",
      "schemeUri" => "https://spdx.org/licenses/",
      "lang" => hsh["lang"] }.compact
  else
    {
      "rights" => hsh["__content__"] || hsh["rights"],
      "rightsUri" => hsh["rightsURI"] || hsh["rightsUri"],
      "rightsIdentifier" => hsh["rightsIdentifier"].present? ? hsh["rightsIdentifier"].downcase : nil,
      "rightsIdentifierScheme" => hsh["rightsIdentifierScheme"],
      "schemeUri" => hsh["schemeUri"],
      "lang" => hsh["lang"] }.compact
  end
end

#jsonlint(json) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1271

def jsonlint(json)
  return ["No JSON provided"] unless json.present?

  error_array = []
  linter = JsonLint::Linter.new
  linter.send(:check_data, json, error_array)
  error_array
end

#map_hash_keys(element: nil, mapping: nil) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1012

def map_hash_keys(element: nil, mapping: nil)
  Array.wrap(element).map do |a|
    a.map {|k, v| [mapping.fetch(k, k), v] }.reduce({}) do |hsh, (k, v)|
      if v.is_a?(Hash)
        hsh[k] = to_schema_org(v)
        hsh
      else
        hsh[k] = v
        hsh
      end
    end
  end.unwrap
end

#name_to_fos(name) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1319

def name_to_fos(name)
  # first find subject in Fields of Science (OECD)
  fos = resource_json(:fos).fetch("fosFields")

  subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }

  if subject
    return [{
      "subject" => sanitize(name) },
    {
      "subject" => "FOS: " + subject["fosLabel"],
      "subjectScheme" => "Fields of Science and Technology (FOS)",
      "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
    }]
  end

  # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
  # and map to Fields of Science. Add an extra entry for the latter
  fores = resource_json(:for)
  for_fields = fores.fetch("forFields")
  for_disciplines = fores.fetch("forDisciplines")

  subject = for_fields.find { |l| l["forLabel"] == name } ||
            for_disciplines.find { |l| l["forLabel"] == name }

  if subject
    [{
      "subject" => sanitize(name) },
    {
      "subject" => "FOS: " + subject["fosLabel"],
      "subjectScheme" => "Fields of Science and Technology (FOS)",
      "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
    }]
  else
    [{ "subject" => sanitize(name) }]
  end
end

#name_to_spdx(name) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1280

def name_to_spdx(name)
  spdx = resource_json(:spdx).fetch("licenses")
  license = spdx.find { |l| l["name"] == name || l["licenseId"] == name || l["seeAlso"].first == normalize_cc_url(name) }

  if license
    {
      "rights" => license["name"],
      "rightsUri" => license["seeAlso"].first,
      "rightsIdentifier" => license["licenseId"].downcase,
      "rightsIdentifierScheme" => "SPDX",
      "schemeUri" => "https://spdx.org/licenses/" }.compact
  else
    { "rights" => name }
  end
end

#normalize_cc_url(id) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 681

def normalize_cc_url(id)
  id = normalize_url(id, https: true)
  NORMALIZED_LICENSES.fetch(id, id)
end

#normalize_id(id, options = {}) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 642

def normalize_id(id, options={})
  return nil unless id.present?

  # check for valid DOI
  doi = DoiUtils::normalize_doi(id, options)
  return doi if doi.present?

  # check for valid HTTP uri
  uri = Addressable::URI.parse(id)
  return nil unless uri && uri.host && %w(http https).include?(uri.scheme)

  # clean up URL
  PostRank::URI.clean(id)
rescue Addressable::URI::InvalidURIError
  nil
end

#normalize_ids(ids: nil, relation_type: nil) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 702

def normalize_ids(ids: nil, relation_type: nil)
  Array.wrap(ids).select { |idx| idx["@id"].present? }.map do |idx|
    id = normalize_id(idx["@id"])
    related_identifier_type = DoiUtils::doi_from_url(id).present? ? "DOI" : "URL"
    id = DoiUtils::doi_from_url(id) || id

    { "relatedIdentifier" => id,
      "relationType" => relation_type,
      "relatedIdentifierType" => related_identifier_type,
      "resourceTypeGeneral" => Metadata::SO_TO_DC_TRANSLATIONS[idx["@type"]] }.compact
  end.unwrap
end

#normalize_issn(input, options = {}) ⇒ `Object`

pick electronic issn if there are multiple format issn as xxxx-xxxx

# File 'lib/bolognese/utils.rb', line 717

def normalize_issn(input, options={})
  content = options[:content] || "__content__"

  issn = if input.blank?
    nil
  elsif input.is_a?(String) && options[:content].nil?
    input
  elsif input.is_a?(Hash)
    input.fetch(content, nil)
  elsif input.is_a?(Array)
    a = input.find { |a| a["media_type"] == "electronic" } || input.first
    a.fetch(content, nil)
  end

  case issn.to_s.length
  when 9
    issn
  when 8
    issn[0..3] + "-" + issn[4..7]
  else
    nil
  end
end

#normalize_licenses(licenses) ⇒ `Object`

find Creative Commons or OSI license in licenses array, normalize url and name

# File 'lib/bolognese/utils.rb', line 742

def normalize_licenses(licenses)
  standard_licenses = Array.wrap(licenses).map { |l| URI.parse(l["url"]) }.select { |li| li.host && li.host[/(creativecommons.org|opensource.org)$/] }
  return licenses unless standard_licenses.present?

  # use HTTPS
  uri.scheme = "https"

  # use host name without subdomain
  uri.host = Array(/(creativecommons.org|opensource.org)/.match uri.host).last

  # normalize URLs
  if uri.host == "creativecommons.org"
    uri.path = uri.path.split('/')[0..-2].join("/") if uri.path.split('/').last == "legalcode"
    uri.path << '/' unless uri.path.end_with?('/')
  else
    uri.path = uri.path.gsub(/(-license|\.php|\.html)/, '')
    uri.path = uri.path.sub(/(mit|afl|apl|osl|gpl|ecl)/) { |match| match.upcase }
    uri.path = uri.path.sub(/(artistic|apache)/) { |match| match.titleize }
    uri.path = uri.path.sub(/([^0-9\-]+)(-)?([1-9])?(\.)?([0-9])?$/) do
      m = Regexp.last_match
      text = m[1]

      if m[3].present?
        version = [m[3], m[5].presence || "0"].join(".")
        [text, version].join("-")
      else
        text
      end
    end
  end

  uri.to_s
rescue URI::InvalidURIError
  nil
end

#normalize_orcid(orcid) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 686

def normalize_orcid(orcid)
  orcid = validate_orcid(orcid)
  return nil unless orcid.present?

  # turn ORCID ID into URL
  "https://orcid.org/" + Addressable::URI.encode(orcid)
end

#normalize_publisher(publisher) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 778

def normalize_publisher(publisher)
  if publisher.respond_to?(:to_hash)
    publisher
  elsif publisher.respond_to?(:to_str)
    { "name" => publisher }
  end
end

#normalize_ror(ror) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 694

def normalize_ror(ror)
  ror = validate_ror(ror)
  return nil unless ror.present?

  # turn ROR into URL
  "https://ror.org/" + Addressable::URI.encode(ror)
end

#normalize_url(id, options = {}) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 659

def normalize_url(id, options={})
  return nil unless id.present?

  # handle info URIs
  return id if id.to_s.start_with?("info")

  # check for valid HTTP uri
  uri = Addressable::URI.parse(id)

  return nil unless uri && uri.host && %w(http https ftp).include?(uri.scheme)

  # optionally turn into https URL
  uri.scheme = "https" if options[:https]

  # clean up URL
  uri.path = PostRank::URI.clean(uri.path)

  uri.to_s
rescue Addressable::URI::InvalidURIError
  nil
end

#orcid_as_url(orcid) ⇒ `Object`



600
601
602

# File 'lib/bolognese/utils.rb', line 600

def orcid_as_url(orcid)
  "https://orcid.org/#{orcid}" if orcid.present?
end

#orcid_from_url(url) ⇒ `Object`



596
597
598

# File 'lib/bolognese/utils.rb', line 596

def orcid_from_url(url)
  Array(/\A:(http|https):\/\/orcid\.org\/(.+)/.match(url)).last
end

#parse_attributes(element, options = {}) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 627

def parse_attributes(element, options={})
  content = options[:content] || "__content__"

  if element.is_a?(String) && options[:content].nil?
    CGI.unescapeHTML(element)
  elsif element.is_a?(Hash)
    element.fetch( CGI.unescapeHTML(content), nil)
  elsif element.is_a?(Array)
    a = element.map { |e| e.is_a?(Hash) ? e.fetch(CGI.unescapeHTML(content), nil) : e }.uniq
    a = options[:first] ? a.first : a.unwrap
  else
    nil
  end
end

#resource_file(extra_path) ⇒ `Object`



504
505
506

# File 'lib/bolognese/utils.rb', line 504

def resource_file( extra_path )
  File.read(resources_dir_path + extra_path)
end

#resource_json(resource_symbol) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 508

def resource_json( resource_symbol )
  if RESOURCE_PATHS.keys().include?(resource_symbol)
    JSON.load(resource_file(RESOURCE_PATHS[resource_symbol]))
  end
end

#resources_dir_path ⇒ `Object`



500
501
502

# File 'lib/bolognese/utils.rb', line 500

def resources_dir_path
  File.expand_path('../../../resources', __FILE__) + '/'
end

#sanitize(text, options = {}) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1067

def sanitize(text, options={})
  options[:tags] ||= Set.new(%w(strong em b i code pre sub sup br))
  content = options[:content] || "__content__"
  custom_scrubber = Bolognese::WhitelistScrubber.new(options)

  if text.is_a?(String)
    if options[:new_line]
      # Remove multiple spaces, tabs, and other whitespace characters while preserving single spaces and new lines
      Loofah.scrub_fragment(text, custom_scrubber).to_s.gsub(/[ \t]+/, ' ').strip
    else
      Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
    end
  elsif text.is_a?(Hash)
    sanitize(text.fetch(content, nil), new_line: options[:new_line])
  elsif text.is_a?(Array)
    a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil), new_line: options[:new_line]) : sanitize(e, new_line: options[:new_line]) }.uniq
    a = options[:first] ? a.first : a.unwrap
  else
    nil
  end
end

#to_citeproc(element) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1048

def to_citeproc(element)
  Array.wrap(element).map do |a|
    a["family"] = a["familyName"]
    a["given"] = a["givenName"]
    a["literal"] = a["name"] unless a["familyName"].present?
    a.except("nameType", "type", "@type", "id", "@id", "name", "familyName", "givenName", "affiliation", "nameIdentifiers", "contributorType").compact
  end.presence
end

#to_datacite_json(element, options = {}) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 786

def to_datacite_json(element, options={})
  a = Array.wrap(element).map do |e|
    e.inject({}) {|h, (k,v)| h[k.dasherize] = v; h }
  end
  options[:first] ? a.unwrap : a.presence
end

#to_identifier(identifier) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1026

def to_identifier(identifier)
  {
    "@type" => "PropertyValue",
    "propertyID" => identifier["relatedIdentifierType"],
    "value" => identifier["relatedIdentifier"] }
end

#to_ris(element) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 1057

def to_ris(element)
  Array.wrap(element).map do |a|
    if a["familyName"].present?
      [a["familyName"], a["givenName"]].join(", ")
    else
      a["name"]
    end
  end.unwrap
end

#to_schema_org(element) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 799

def to_schema_org(element)
  mapping = { "type" => "@type", "id" => "@id", "title" => "name" }

  map_hash_keys(element: element, mapping: mapping)
end

#to_schema_org_container(element, options = {}) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 852

def to_schema_org_container(element, options={})
  return nil unless (element.is_a?(Hash) || (element.nil? && options[:container_title].present?))

  {
    "@id" => element["identifier"],
    "@type" => (options[:type] == "Dataset") ? "DataCatalog" : "Periodical",
    "name" => element["title"] || options[:container_title] }.compact
end

#to_schema_org_contributors(element) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 828

def to_schema_org_contributors(element)
  element = Array.wrap(element).map do |c|
    transformed_c = c.dup
    transformed_c["affiliation"] = Array.wrap(c["affiliation"]).map do |a|
      if a.is_a?(String)
        name = a
        affiliation_identifier = nil
      else
        name = a["name"]
        affiliation_identifier = a["affiliationIdentifier"]
      end

      {
        "@type" => "Organization",
        "@id" => affiliation_identifier,
        "name" => name }.compact
    end.unwrap
    transformed_c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
    transformed_c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
    transformed_c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
    transformed_c.except("nameIdentifiers", "nameType").compact
  end.unwrap
end

#to_schema_org_creators(element) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 805

def to_schema_org_creators(element)
  element = Array.wrap(element).map do |c|
    c["affiliation"] = Array.wrap(c["affiliation"]).map do |a|
      if a.is_a?(String)
        name = a
        affiliation_identifier = nil
      else
        name = a["name"]
        affiliation_identifier = a["affiliationIdentifier"]
      end

      {
        "@type" => "Organization",
        "@id" => affiliation_identifier,
        "name" => name }.compact
    end.unwrap
    c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
    c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
    c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
    c.except("nameIdentifiers", "nameType").compact
  end.unwrap
end

#to_schema_org_funder(funding_references) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 888

def to_schema_org_funder(funding_references)
  return nil unless funding_references.present?

  Array.wrap(funding_references).map do |fr|
    {
      "@id" => fr["funderIdentifier"],
      "@type" => "Organization",
      "name" => fr["funderName"] }.compact
  end.unwrap
end

#to_schema_org_identifiers(element, options = {}) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 861

def to_schema_org_identifiers(element, options={})
  Array.wrap(element).map do |ai|
    {
      "@type" => "PropertyValue",
      "propertyID" => ai["identifierType"],
      "value" => ai["identifier"] }
  end.unwrap
end

#to_schema_org_relation(related_identifiers: nil, relation_type: nil) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 870

def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
  return nil unless related_identifiers.present? && relation_type.present?

  relation_type = relation_type == "References" ? ["References", "Cites", "Documents"] : [relation_type]

  Array.wrap(related_identifiers).select { |ri| relation_type.include?(ri["relationType"]) }.map do |r|
    if r["relatedIdentifierType"] == "ISSN" && r["relationType"] == "IsPartOf"
      {
        "@type" => "Periodical",
        "issn" => r["relatedIdentifier"] }.compact
    else
    {
      "@id" => normalize_id(r["relatedIdentifier"]),
      "@type" => DC_TO_SO_TRANSLATIONS[r["resourceTypeGeneral"]] || "CreativeWork" }.compact
    end
  end.unwrap
end

#to_schema_org_spatial_coverage(geo_location) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 899

def to_schema_org_spatial_coverage(geo_location)
  return nil unless geo_location.present?

  Array.wrap(geo_location).reduce([]) do |sum, gl|
    if gl.fetch("geoLocationPoint", nil)
      sum << {
        "@type" => "Place",
        "geo" => {
          "@type" => "GeoCoordinates",
          "address" => gl["geoLocationPlace"],
          "latitude" => gl.dig("geoLocationPoint", "pointLatitude"),
          "longitude" => gl.dig("geoLocationPoint", "pointLongitude") }
      }.compact
    end

    if gl.fetch("geoLocationBox", nil)
      sum << {
        "@type" => "Place",
        "geo" => {
          "@type" => "GeoShape",
          "address" => gl["geoLocationPlace"],
          "box" => [gl.dig("geoLocationBox", "southBoundLatitude"),
                    gl.dig("geoLocationBox", "westBoundLongitude"),
                    gl.dig("geoLocationBox", "northBoundLatitude"),
                    gl.dig("geoLocationBox", "eastBoundLongitude")].compact.join(" ").presence }.compact
      }.compact
    end

    if gl.fetch("geoLocationPolygon", nil)
      sum << {
        "@type" => "Place",
        "geo" => {
          "@type" => "GeoShape",
          "address" => gl["geoLocationPlace"],
          "polygon" => Array.wrap(gl.dig("geoLocationPolygon")).map do |glp|
            Array.wrap(glp).map do |glpp|
              if glpp.dig("polygonPoint")
                [glpp.dig("polygonPoint", "pointLongitude"), glpp.dig("polygonPoint", "pointLatitude")].compact
              end
            end.compact.presence
          end.compact.presence,
          }
      }
    end

    if gl.fetch("geoLocationPlace", nil) && !gl.fetch("geoLocationPoint", nil) && !gl.fetch("geoLocationBox", nil) && !gl.fetch("geoLocationPolygon", nil)
      sum << {
        "@type" => "Place",
        "geo" => {
          "@type" => "GeoCoordinates",
          "address" => gl["geoLocationPlace"] }
      }.compact
    end

    sum
  end.unwrap
end

#validate_orcid(orcid) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 604

def validate_orcid(orcid)
  orcid = Array(/\A(?:(?:http|https):\/\/(?:(?:www|sandbox)?\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\/{0,1}\z/.match(orcid)).last
  orcid.gsub(/[[:space:]]/, "-") if orcid.present?
end

#validate_orcid_scheme(orcid_scheme) ⇒ `Object`



613
614
615

# File 'lib/bolognese/utils.rb', line 613

def validate_orcid_scheme(orcid_scheme)
  Array(/\A(http|https):\/\/(www\.)?(orcid\.org)/.match(orcid_scheme)).last
end

#validate_ror(ror) ⇒ `Object`



609
610
611

# File 'lib/bolognese/utils.rb', line 609

def validate_ror(ror)
  Array(/^(?:(?:(?:http|https):\/\/)?ror\.org\/)?(0\w{6}\d{2})\/{0,1}$/.match(ror)).last
end

#validate_url(str) ⇒ `Object`

# File 'lib/bolognese/utils.rb', line 617

def validate_url(str)
  if /\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(str)
    "DOI"
  elsif /\A(http|https):\/\//.match(str)
    "URL"
  elsif /\A(ISSN|eISSN) (\d{4}-\d{3}[0-9X]+)\z/.match(str)
    "ISSN"
  end
end

Module: Bolognese::Utils

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#abstract_description ⇒ Object

#dfg_ids_to_fos(dfg_ids) ⇒ Object

#find_from_format(id: nil, string: nil, ext: nil, filename: nil) ⇒ Object

#find_from_format_by_ext(string, options = {}) ⇒ Object

#find_from_format_by_filename(filename) ⇒ Object

#find_from_format_by_id(id) ⇒ Object

#find_from_format_by_string(string) ⇒ Object

#from_citeproc(element) ⇒ Object

#from_datacite_json(element) ⇒ Object

#from_schema_org(element) ⇒ Object

#from_schema_org_contributors(element) ⇒ Object

#from_schema_org_creators(element) ⇒ Object

#generate_container(types, related_items, related_identifiers, descriptions) ⇒ Object

#get_contributor(contributor, contributor_type) ⇒ Object

#get_date(dates, date_type) ⇒ Object

#get_date_from_date_parts(date_as_parts) ⇒ Object

#get_date_from_parts(year, month = nil, day = nil) ⇒ Object

#get_date_parts(iso8601_time) ⇒ Object

#get_date_parts_from_parts(year, month = nil, day = nil) ⇒ Object

#get_datetime_from_iso8601(iso8601_time) ⇒ Object

#get_datetime_from_time(time) ⇒ Object

#get_identifier(identifiers, identifier_type) ⇒ Object

#get_identifier_type(identifier_type) ⇒ Object

#get_iso8601_date(iso8601_time) ⇒ Object

#get_series_information(str) ⇒ Object

#get_year_month(iso8601_time) ⇒ Object

#get_year_month_day(iso8601_time) ⇒ Object

#github_as_codemeta_url(url) ⇒ Object

#github_as_owner_url(url) ⇒ Object

#github_as_release_url(url) ⇒ Object

#github_as_repo_url(url) ⇒ Object

#github_from_url(url) ⇒ Object

#github_owner_from_url(url) ⇒ Object

#github_release_from_url(url) ⇒ Object

#github_repo_from_url(url) ⇒ Object

#hsh_to_fos(hsh) ⇒ Object

#hsh_to_spdx(hsh) ⇒ Object

#jsonlint(json) ⇒ Object

#map_hash_keys(element: nil, mapping: nil) ⇒ Object

#name_to_fos(name) ⇒ Object

#name_to_spdx(name) ⇒ Object

#normalize_cc_url(id) ⇒ Object

#normalize_id(id, options = {}) ⇒ Object

#normalize_ids(ids: nil, relation_type: nil) ⇒ Object

#normalize_issn(input, options = {}) ⇒ Object

#normalize_licenses(licenses) ⇒ Object

#normalize_orcid(orcid) ⇒ Object

#normalize_publisher(publisher) ⇒ Object

#normalize_ror(ror) ⇒ Object

#normalize_url(id, options = {}) ⇒ Object

#orcid_as_url(orcid) ⇒ Object

#orcid_from_url(url) ⇒ Object

#parse_attributes(element, options = {}) ⇒ Object

#resource_file(extra_path) ⇒ Object

#resource_json(resource_symbol) ⇒ Object

#resources_dir_path ⇒ Object

#sanitize(text, options = {}) ⇒ Object

#to_citeproc(element) ⇒ Object

#to_datacite_json(element, options = {}) ⇒ Object

#to_identifier(identifier) ⇒ Object

#to_ris(element) ⇒ Object

#to_schema_org(element) ⇒ Object

#to_schema_org_container(element, options = {}) ⇒ Object

#to_schema_org_contributors(element) ⇒ Object

#to_schema_org_creators(element) ⇒ Object

#to_schema_org_funder(funding_references) ⇒ Object

#to_schema_org_identifiers(element, options = {}) ⇒ Object

#to_schema_org_relation(related_identifiers: nil, relation_type: nil) ⇒ Object

#to_schema_org_spatial_coverage(geo_location) ⇒ Object

#validate_orcid(orcid) ⇒ Object

#validate_orcid_scheme(orcid_scheme) ⇒ Object

#validate_ror(ror) ⇒ Object

#validate_url(str) ⇒ Object