Module: Briard::Utils

Included in:: CLI, MetadataUtils

Defined in:: lib/briard/utils.rb

Constant Summary collapse

NORMALIZED_LICENSES =

{
  'https://creativecommons.org/licenses/by/1.0' => 'https://creativecommons.org/licenses/by/1.0/legalcode',
  'https://creativecommons.org/licenses/by/2.0' => 'https://creativecommons.org/licenses/by/2.0/legalcode',
  'https://creativecommons.org/licenses/by/2.5' => 'https://creativecommons.org/licenses/by/2.5/legalcode',
  'https://creativecommons.org/licenses/by/3.0' => 'https://creativecommons.org/licenses/by/3.0/legalcode',
  'https://creativecommons.org/licenses/by/3.0/us' => 'https://creativecommons.org/licenses/by/3.0/legalcode',
  'https://creativecommons.org/licenses/by/4.0' => 'https://creativecommons.org/licenses/by/4.0/legalcode',
  'https://creativecommons.org/licenses/by-nc/1.0' => 'https://creativecommons.org/licenses/by-nc/1.0/legalcode',
  'https://creativecommons.org/licenses/by-nc/2.0' => 'https://creativecommons.org/licenses/by-nc/2.0/legalcode',
  'https://creativecommons.org/licenses/by-nc/2.5' => 'https://creativecommons.org/licenses/by-nc/2.5/legalcode',
  'https://creativecommons.org/licenses/by-nc/3.0' => 'https://creativecommons.org/licenses/by-nc/3.0/legalcode',
  'https://creativecommons.org/licenses/by-nc/4.0' => 'https://creativecommons.org/licenses/by-nc/4.0/legalcode',
  'https://creativecommons.org/licenses/by-nd-nc/1.0' => 'https://creativecommons.org/licenses/by-nd-nc/1.0/legalcode',
  'https://creativecommons.org/licenses/by-nd-nc/2.0' => 'https://creativecommons.org/licenses/by-nd-nc/2.0/legalcode',
  'https://creativecommons.org/licenses/by-nd-nc/2.5' => 'https://creativecommons.org/licenses/by-nd-nc/2.5/legalcode',
  'https://creativecommons.org/licenses/by-nd-nc/3.0' => 'https://creativecommons.org/licenses/by-nd-nc/3.0/legalcode',
  'https://creativecommons.org/licenses/by-nd-nc/4.0' => 'https://creativecommons.org/licenses/by-nd-nc/4.0/legalcode',
  'https://creativecommons.org/licenses/by-nc-sa/1.0' => 'https://creativecommons.org/licenses/by-nc-sa/1.0/legalcode',
  'https://creativecommons.org/licenses/by-nc-sa/2.0' => 'https://creativecommons.org/licenses/by-nc-sa/2.0/legalcode',
  'https://creativecommons.org/licenses/by-nc-sa/2.5' => 'https://creativecommons.org/licenses/by-nc-sa/2.5/legalcode',
  'https://creativecommons.org/licenses/by-nc-sa/3.0' => 'https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode',
  'https://creativecommons.org/licenses/by-nc-sa/4.0' => 'https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode',
  'https://creativecommons.org/licenses/by-nd/1.0' => 'https://creativecommons.org/licenses/by-nd/1.0/legalcode',
  'https://creativecommons.org/licenses/by-nd/2.0' => 'https://creativecommons.org/licenses/by-nd/2.0/legalcode',
  'https://creativecommons.org/licenses/by-nd/2.5' => 'https://creativecommons.org/licenses/by-nd/2.5/legalcode',
  'https://creativecommons.org/licenses/by-nd/3.0' => 'https://creativecommons.org/licenses/by-nd/3.0/legalcode',
  'https://creativecommons.org/licenses/by-nd/4.0' => 'https://creativecommons.org/licenses/by-nd/2.0/legalcode',
  'https://creativecommons.org/licenses/by-sa/1.0' => 'https://creativecommons.org/licenses/by-sa/1.0/legalcode',
  'https://creativecommons.org/licenses/by-sa/2.0' => 'https://creativecommons.org/licenses/by-sa/2.0/legalcode',
  'https://creativecommons.org/licenses/by-sa/2.5' => 'https://creativecommons.org/licenses/by-sa/2.5/legalcode',
  'https://creativecommons.org/licenses/by-sa/3.0' => 'https://creativecommons.org/licenses/by-sa/3.0/legalcode',
  'https://creativecommons.org/licenses/by-sa/4.0' => 'https://creativecommons.org/licenses/by-sa/4.0/legalcode',
  'https://creativecommons.org/licenses/by-nc-nd/1.0' => 'https://creativecommons.org/licenses/by-nc-nd/1.0/legalcode',
  'https://creativecommons.org/licenses/by-nc-nd/2.0' => 'https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode',
  'https://creativecommons.org/licenses/by-nc-nd/2.5' => 'https://creativecommons.org/licenses/by-nc-nd/2.5/legalcode',
  'https://creativecommons.org/licenses/by-nc-nd/3.0' => 'https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode',
  'https://creativecommons.org/licenses/by-nc-nd/4.0' => 'https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode',
  'https://creativecommons.org/licenses/publicdomain' => 'https://creativecommons.org/licenses/publicdomain/',
  'https://creativecommons.org/publicdomain/zero/1.0' => 'https://creativecommons.org/publicdomain/zero/1.0/legalcode'
}

DC_TO_SO_TRANSLATIONS =

{
  'Audiovisual' => 'MediaObject',
  'Book' => 'Book',
  'BookChapter' => 'Chapter',
  'Collection' => 'Collection',
  'ComputationalNotebook' => 'SoftwareSourceCode',
  'ConferencePaper' => 'Article',
  'ConferenceProceeding' => 'Periodical',
  'DataPaper' => 'Article',
  'Dataset' => 'Dataset',
  'Dissertation' => 'Thesis',
  'Event' => 'Event',
  'Image' => 'ImageObject',
  'InteractiveResource' => nil,
  'Journal' => 'Periodical',
  'JournalArticle' => 'ScholarlyArticle',
  'Model' => nil,
  'OutputManagementPlan' => nil,
  'PeerReview' => 'Review',
  'PhysicalObject' => nil,
  'Preprint' => nil,
  'Report' => 'Report',
  'Service' => 'Service',
  'Software' => 'SoftwareSourceCode',
  'Sound' => 'AudioObject',
  'Standard' => nil,
  'Text' => 'ScholarlyArticle',
  'Workflow' => nil,
  'Other' => 'CreativeWork',
  # not part of DataCite schema, but used internally
  'Periodical' => 'Periodical',
  'DataCatalog' => 'DataCatalog'
}

DC_TO_CP_TRANSLATIONS =

{
  'Audiovisual' => 'motion_picture',
  'Book' => 'book',
  'BookChapter' => 'chapter',
  'Collection' => nil,
  'ComputationalNotebook' => nil,
  'ConferencePaper' => 'paper_conference',
  'ConferenceProceeding' => nil,
  'DataPaper' => 'report',
  'Dataset' => 'dataset',
  'Dissertation' => nil,
  'Event' => nil,
  'Image' => 'graphic',
  'InteractiveResource' => nil,
  'Journal' => nil,
  'JournalArticle' => 'article-journal',
  'Model' => nil,
  'OutputManagementPlan' => nil,
  'PeerReview' => 'review',
  'PhysicalObject' => nil,
  'Preprint' => nil,
  'Report' => 'report',
  'Service' => nil,
  'Sound' => 'song',
  'Standard' => nil,
  'Text' => 'report',
  'Workflow' => nil,
  'Other' => nil
}

CR_TO_CP_TRANSLATIONS =

{
  'Proceedings' => nil,
  'ReferenceBook' => nil,
  'JournalIssue' => 'article-journal',
  'ProceedingsArticle' => 'paper-conference',
  'Other' => nil,
  'Dissertation' => 'thesis',
  'Dataset' => 'dataset',
  'EditedBook' => 'book',
  'PostedContent' => 'article-journal',
  'JournalArticle' => 'article-journal',
  'Journal' => nil,
  'Report' => 'report',
  'BookSeries' => nil,
  'ReportSeries' => nil,
  'BookTrack' => nil,
  'Standard' => nil,
  'BookSection' => 'chapter',
  'BookPart' => nil,
  'Book' => 'book',
  'BookChapter' => 'chapter',
  'StandardSeries' => nil,
  'Monograph' => 'book',
  'Component' => nil,
  'ReferenceEntry' => 'entry-dictionary',
  'JournalVolume' => nil,
  'BookSet' => nil
}

CR_TO_SO_TRANSLATIONS =

{
  'Proceedings' => nil,
  'ReferenceBook' => 'Book',
  'JournalIssue' => 'PublicationIssue',
  'ProceedingsArticle' => nil,
  'Other' => 'CreativeWork',
  'Dissertation' => 'Thesis',
  'Dataset' => 'Dataset',
  'EditedBook' => 'Book',
  'JournalArticle' => 'ScholarlyArticle',
  'Journal' => nil,
  'Report' => 'Report',
  'BookSeries' => nil,
  'ReportSeries' => nil,
  'BookTrack' => nil,
  'Standard' => nil,
  'BookSection' => nil,
  'BookPart' => nil,
  'Book' => 'Book',
  'BookChapter' => 'Chapter',
  'StandardSeries' => nil,
  'Monograph' => 'Book',
  'Component' => 'CreativeWork',
  'ReferenceEntry' => nil,
  'JournalVolume' => 'PublicationVolume',
  'BookSet' => nil,
  'PostedContent' => 'ScholarlyArticle',
  'PeerReview' => 'Review'
}

CR_TO_BIB_TRANSLATIONS =

{
  'Proceedings' => 'proceedings',
  'ReferenceBook' => 'book',
  'JournalIssue' => nil,
  'ProceedingsArticle' => nil,
  'Other' => nil,
  'Dissertation' => 'phdthesis',
  'Dataset' => nil,
  'EditedBook' => 'book',
  'JournalArticle' => 'article',
  'Journal' => nil,
  'Report' => 'techreport',
  'BookSeries' => nil,
  'ReportSeries' => nil,
  'BookTrack' => nil,
  'Standard' => nil,
  'BookSection' => 'inbook',
  'BookPart' => nil,
  'Book' => 'book',
  'BookChapter' => 'inbook',
  'StandardSeries' => nil,
  'Monograph' => 'book',
  'Component' => nil,
  'ReferenceEntry' => nil,
  'JournalVolume' => nil,
  'BookSet' => nil,
  'PostedContent' => 'article'
}

BIB_TO_CR_TRANSLATIONS =

{
  'proceedings' => 'Proceedings',
  'phdthesis' => 'Dissertation',
  'article' => 'JournalArticle',
  'book' => 'Book',
  'inbook' => 'BookChapter'
}

CR_TO_JATS_TRANSLATIONS =

{
  'Proceedings' => 'working-paper',
  'ReferenceBook' => 'book',
  'JournalIssue' => 'journal',
  'ProceedingsArticle' => 'working-paper',
  'Other' => nil,
  'Dissertation' => nil,
  'Dataset' => 'data',
  'EditedBook' => 'book',
  'JournalArticle' => 'journal',
  'Journal' => 'journal',
  'Report' => 'report',
  'BookSeries' => 'book',
  'ReportSeries' => 'report',
  'BookTrack' => 'book',
  'Standard' => 'standard',
  'BookSection' => 'chapter',
  'BookPart' => 'chapter',
  'Book' => 'book',
  'BookChapter' => 'chapter',
  'StandardSeries' => 'standard',
  'Monograph' => 'book',
  'Component' => nil,
  'ReferenceEntry' => nil,
  'JournalVolume' => 'journal',
  'BookSet' => 'book'
}

CR_TO_DC_TRANSLATIONS =

{
  'Proceedings' => nil,
  'ReferenceBook' => nil,
  'JournalIssue' => 'Text',
  'ProceedingsArticle' => 'ConferencePaper',
  'Other' => 'Other',
  'Dissertation' => 'Dissertation',
  'Dataset' => 'Dataset',
  'EditedBook' => 'Book',
  'JournalArticle' => 'JournalArticle',
  'Journal' => 'Journal',
  'Report' => 'Report',
  'BookSeries' => nil,
  'ReportSeries' => nil,
  'BookTrack' => nil,
  'Standard' => 'Standard',
  'BookSection' => 'BookChapter',
  'BookPart' => nil,
  'Book' => 'Book',
  'BookChapter' => 'BookChapter',
  'SaComponent' => 'Text',
  'StandardSeries' => 'Standard',
  'Monograph' => 'Book',
  'Component' => nil,
  'ReferenceEntry' => nil,
  'JournalVolume' => nil,
  'BookSet' => nil,
  'PostedContent' => 'Preprint',
  'PeerReview' => 'PeerReview'
}

SO_TO_DC_TRANSLATIONS =

{
  'Article' => 'Preprint',
  'AudioObject' => 'Sound',
  'Blog' => 'Text',
  'BlogPosting' => 'Preprint',
  'Book' => 'Book',
  'Chapter' => 'BookChapter',
  'Collection' => 'Collection',
  'CreativeWork' => 'Text',
  'DataCatalog' => 'Dataset',
  'Dataset' => 'Dataset',
  'Event' => 'Event',
  'ImageObject' => 'Image',
  'Movie' => 'Audiovisual',
  'PublicationIssue' => 'Text',
  'Report' => 'Report',
  'ScholarlyArticle' => 'Text',
  'Thesis' => 'Text',
  'Service' => 'Service',
  'Review' => 'PeerReview',
  'SoftwareSourceCode' => 'Software',
  'VideoObject' => 'Audiovisual',
  'WebPage' => 'Text',
  'WebSite' => 'Text'
}

SO_TO_JATS_TRANSLATIONS =

{
  'Article' => 'journal',
  'AudioObject' => nil,
  'Blog' => nil,
  'BlogPosting' => nil,
  'Book' => 'book',
  'Collection' => nil,
  'CreativeWork' => nil,
  'DataCatalog' => 'data',
  'Dataset' => 'data',
  'Event' => nil,
  'ImageObject' => nil,
  'Movie' => nil,
  'PublicationIssue' => 'journal',
  'ScholarlyArticle' => 'journal',
  'Service' => nil,
  'SoftwareSourceCode' => 'software',
  'VideoObject' => nil,
  'WebPage' => nil,
  'WebSite' => 'website'
}

SO_TO_CP_TRANSLATIONS =

{
  'Article' => 'article-newspaper',
  'AudioObject' => 'song',
  'Blog' => 'report',
  'BlogPosting' => 'post-weblog',
  'Collection' => nil,
  'CreativeWork' => nil,
  'DataCatalog' => 'dataset',
  'Dataset' => 'dataset',
  'Event' => nil,
  'ImageObject' => 'graphic',
  'Movie' => 'motion_picture',
  'PublicationIssue' => nil,
  'Report' => 'report',
  'ScholarlyArticle' => 'article-journal',
  'Service' => nil,
  'Thesis' => 'thesis',
  'VideoObject' => 'broadcast',
  'WebPage' => 'webpage',
  'WebSite' => 'webpage'
}

SO_TO_RIS_TRANSLATIONS =

{
  'Article' => 'GEN',
  'AudioObject' => nil,
  'Blog' => nil,
  'BlogPosting' => 'BLOG',
  'Collection' => nil,
  'CreativeWork' => 'GEN',
  'DataCatalog' => 'CTLG',
  'Dataset' => 'DATA',
  'Event' => nil,
  'ImageObject' => 'FIGURE',
  'Movie' => 'MPCT',
  'Report' => 'RPRT',
  'PublicationIssue' => nil,
  'ScholarlyArticle' => 'JOUR',
  'Service' => nil,
  'SoftwareSourceCode' => 'COMP',
  'VideoObject' => 'VIDEO',
  'WebPage' => 'ELEC',
  'WebSite' => nil
}

CR_TO_RIS_TRANSLATIONS =

{
  'Proceedings' => 'CONF',
  'PostedContent' => 'JOUR',
  'ReferenceBook' => 'BOOK',
  'JournalIssue' => 'JOUR',
  'ProceedingsArticle' => 'CPAPER',
  'Other' => 'GEN',
  'Dissertation' => 'THES',
  'Dataset' => 'DATA',
  'EditedBook' => 'BOOK',
  'JournalArticle' => 'JOUR',
  'Journal' => nil,
  'Report' => 'RPRT',
  'BookSeries' => nil,
  'ReportSeries' => nil,
  'BookTrack' => nil,
  'Standard' => 'STAND',
  'BookSection' => 'CHAP',
  'BookPart' => 'CHAP',
  'Book' => 'BOOK',
  'BookChapter' => 'CHAP',
  'StandardSeries' => nil,
  'Monograph' => 'BOOK',
  'Component' => nil,
  'ReferenceEntry' => 'DICT',
  'JournalVolume' => nil,
  'BookSet' => nil
}

DC_TO_RIS_TRANSLATIONS =

{
  'Audiovisual' => 'MPCT',
  'Book' => 'BOOK',
  'BookChapter' => 'CHAP',
  'Collection' => nil,
  'ComputationalNotebook' => 'COMP',
  'ConferencePaper' => 'CPAPER',
  'ConferenceProceeding' => 'CONF',
  'DataPaper' => nil,
  'Dataset' => 'DATA',
  'Dissertation' => 'THES',
  'Event' => nil,
  'Image' => 'FIGURE',
  'InteractiveResource' => nil,
  'Journal' => nil,
  'JournalArticle' => 'JOUR',
  'Model' => nil,
  'OutputManagementPlan' => nil,
  'PeerReview' => nil,
  'PhysicalObject' => nil,
  'Preprint' => 'RPRT',
  'Report' => 'RRPT',
  'Service' => nil,
  'Software' => 'COMP',
  'Sound' => 'SOUND',
  'Standard' => nil,
  'Text' => 'RPRT',
  'Workflow' => nil,
  'Other' => nil
}

RIS_TO_DC_TRANSLATIONS =

{
  'BLOG' => 'Text',
  'GEN' => 'Text',
  'CTLG' => 'Collection',
  'DATA' => 'Dataset',
  'FIGURE' => 'Image',
  'THES' => 'Dissertation',
  'MPCT' => 'Audiovisual',
  'JOUR' => 'JournalArticle',
  'COMP' => 'Software',
  'VIDEO' => 'Audiovisual',
  'ELEC' => 'Text'
}

BIB_TO_DC_TRANSLATIONS =

{
  'article' => 'JournalArticle',
  'book' => 'Book',
  'inbook' => 'BookChapter',
  'inproceedings' => nil,
  'manual' => nil,
  'misc' => 'Other',
  'phdthesis' => 'Dissertation',
  'proceedings' => 'ConferenceProceeding',
  'techreport' => 'Report',
  'unpublished' => nil
}

CP_TO_DC_TRANSLATIONS =

{
  'song' => 'Audiovisual',
  'post-weblog' => 'Text',
  'dataset' => 'Dataset',
  'graphic' => 'Image',
  'motion_picture' => 'Audiovisual',
  'article-journal' => 'JournalArticle',
  'broadcast' => 'Audiovisual',
  'webpage' => 'Text'
}

SO_TO_BIB_TRANSLATIONS =

{
  'Article' => 'article',
  'AudioObject' => 'misc',
  'Thesis' => 'phdthesis',
  'Blog' => 'misc',
  'BlogPosting' => 'article',
  'Collection' => 'misc',
  'CreativeWork' => 'misc',
  'DataCatalog' => 'misc',
  'Dataset' => 'misc',
  'Event' => 'misc',
  'ImageObject' => 'misc',
  'Movie' => 'misc',
  'PublicationIssue' => 'misc',
  'ScholarlyArticle' => 'article',
  'Service' => 'misc',
  'SoftwareSourceCode' => 'misc',
  'VideoObject' => 'misc',
  'WebPage' => 'misc',
  'WebSite' => 'misc'
}

UNKNOWN_INFORMATION =

{
  ':unac' => 'temporarily inaccessible',
  ':unal' => 'unallowed, suppressed intentionally',
  ':unap' => 'not applicable, makes no sense',
  ':unas' => 'value unassigned (e.g., Untitled)',
  ':unav' => 'value unavailable, possibly unknown',
  ':unkn' => 'known to be unknown (e.g., Anonymous, Inconnue)',
  ':none' => 'never had a value, never will',
  ':null' => 'explicitly and meaningfully empty',
  ':tba' => 'to be assigned or announced later',
  ':etal' => 'too numerous to list (et alia)'
}

Instance Method Summary collapse

#decode_doi(doi) ⇒ Object
#encode_doi(prefix) ⇒ Object
#find_from_format(id: nil, string: nil, ext: nil, filename: nil) ⇒ Object
#find_from_format_by_ext(string, options = {}) ⇒ Object
#find_from_format_by_filename(filename) ⇒ Object
#find_from_format_by_id(id) ⇒ Object
#find_from_format_by_string(string) ⇒ Object
#from_citeproc(element) ⇒ Object
#from_datacite_json(element) ⇒ Object
#from_schema_org(element) ⇒ Object
#from_schema_org_contributors(element) ⇒ Object
#from_schema_org_creators(element) ⇒ Object
#get_contributor(contributor, contributor_type) ⇒ Object
#get_date(dates, date_type) ⇒ Object
#get_date_from_date_parts(date_as_parts) ⇒ Object
#get_date_from_parts(year, month = nil, day = nil) ⇒ Object
#get_date_parts(iso8601_time) ⇒ Object
#get_date_parts_from_parts(year, month = nil, day = nil) ⇒ Object
#get_datetime_from_iso8601(iso8601_time) ⇒ Object

parsing of incomplete iso8601 timestamps such as 2015-04 is broken in standard library return nil if invalid iso8601 timestamp.
#get_datetime_from_time(time) ⇒ Object

iso8601 datetime without hyphens and colons, used by Crossref return nil if invalid.
#get_identifier(identifiers, identifier_type) ⇒ Object
#get_identifier_type(identifier_type) ⇒ Object
#get_iso8601_date(iso8601_time) ⇒ Object
#get_series_information(str) ⇒ Object
#get_year_month(iso8601_time) ⇒ Object
#get_year_month_day(iso8601_time) ⇒ Object
#github_as_cff_url(url) ⇒ Object
#github_as_codemeta_url(url) ⇒ Object
#github_as_owner_url(url) ⇒ Object
#github_as_release_url(url) ⇒ Object
#github_as_repo_url(url) ⇒ Object
#github_from_url(url) ⇒ Object
#github_owner_from_url(url) ⇒ Object
#github_release_from_url(url) ⇒ Object
#github_repo_from_url(url) ⇒ Object
#hsh_to_fos(hsh) ⇒ Object
#hsh_to_spdx(hsh) ⇒ Object
#jsonlint(json) ⇒ Object
#map_hash_keys(element: nil, mapping: nil) ⇒ Object
#name_to_fos(name) ⇒ Object
#name_to_spdx(name) ⇒ Object
#normalize_cc_url(id) ⇒ Object
#normalize_id(id, options = {}) ⇒ Object
#normalize_ids(ids: nil, relation_type: nil) ⇒ Object
#normalize_issn(input, options = {}) ⇒ Object

pick electronic issn if there are multiple format issn as xxxx-xxxx.
#normalize_licenses(licenses) ⇒ Object

find Creative Commons or OSI license in licenses array, normalize url and name.
#normalize_orcid(orcid) ⇒ Object
#normalize_url(id, options = {}) ⇒ Object
#orcid_as_url(orcid) ⇒ Object
#orcid_from_url(url) ⇒ Object
#parse_attributes(element, options = {}) ⇒ Object
#sanitize(text, options = {}) ⇒ Object
#strip_milliseconds(iso8601_time) ⇒ Object

strip milliseconds if there is a time, as it interferes with edtc parsing keep dates unchanged.
#to_citeproc(element) ⇒ Object
#to_datacite_json(element, options = {}) ⇒ Object
#to_identifier(identifier) ⇒ Object
#to_ris(element) ⇒ Object
#to_schema_org(element) ⇒ Object
#to_schema_org_container(element, options = {}) ⇒ Object
#to_schema_org_contributors(element) ⇒ Object
#to_schema_org_creators(element) ⇒ Object
#to_schema_org_funder(funding_references) ⇒ Object
#to_schema_org_identifiers(element, _options = {}) ⇒ Object
#to_schema_org_relation(related_identifiers: nil, relation_type: nil) ⇒ Object
#to_schema_org_spatial_coverage(geo_location) ⇒ Object
#validate_orcid(orcid) ⇒ Object
#validate_orcid_scheme(orcid_scheme) ⇒ Object
#validate_url(str) ⇒ Object

Instance Method Details

#decode_doi(doi) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1498

def decode_doi(doi)
  suffix = doi.split('/', 5).last
  Base32::URL.decode(suffix)
end

#encode_doi(prefix) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1491

def encode_doi(prefix)
  random_int = SecureRandom.random_number(2**63..(2**64) - 1)
  suffix = Base32::URL.encode(random_int)
  str = "#{suffix[0, 7]}-#{suffix[6, 7]}"
  "https://doi.org/#{prefix}/#{str}"
end

#find_from_format(id: nil, string: nil, ext: nil, filename: nil) ⇒ `Object`

# File 'lib/briard/utils.rb', line 490

def find_from_format(id: nil, string: nil, ext: nil, filename: nil)
  if id.present?
    find_from_format_by_id(id)
  elsif string.present? && ext.present?
    find_from_format_by_ext(string, ext: ext)
  elsif string.present?
    find_from_format_by_string(string)
  elsif filename.present?
    find_from_format_by_filename(filename)
  else
    'datacite'
  end
end

#find_from_format_by_ext(string, options = {}) ⇒ `Object`

# File 'lib/briard/utils.rb', line 533

def find_from_format_by_ext(string, options = {})
  if options[:ext] == '.bib'
    'bibtex'
  elsif options[:ext] == '.ris'
    'ris'
  elsif options[:ext] == '.xml' && Maremma.from_xml(string).to_h.dig('crossref_result',
                                                                     'query_result', 'body', 'query', 'doi_record', 'crossref')
    'crossref'
  elsif options[:ext] == '.xml' && Nokogiri::XML(string, nil, 'UTF-8',
                                                 &:noblanks).collect_namespaces.find do |_k, v|
          v.start_with?('http://datacite.org/schema/kernel')
        end
    'datacite'
  elsif options[:ext] == '.cff'
    'cff'
  elsif options[:ext] == '.json' && URI(Maremma.from_json(string).to_h.fetch('@context',
                                                                             '')).host == 'schema.org'
    'schema_org'
  elsif options[:ext] == '.json' && Maremma.from_json(string).to_h.dig('source') == 'Crossref'
    'crossref_json'
  elsif options[:ext] == '.json' && Maremma.from_json(string).to_h.dig('@context') == ('https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld')
    'codemeta'
  elsif options[:ext] == '.json' && Maremma.from_json(string).to_h.dig('schemaVersion').to_s.start_with?('http://datacite.org/schema/kernel')
    'datacite_json'
  elsif options[:ext] == '.json' && Maremma.from_json(string).to_h.dig('types') && Maremma.from_json(string).to_h.dig('publication_year').present?
    'crosscite'
  elsif options[:ext] == '.json' && Maremma.from_json(string).to_h.dig('issued',
                                                                       'date-parts').present?
    'citeproc'
  end
end

#find_from_format_by_filename(filename) ⇒ `Object`

# File 'lib/briard/utils.rb', line 525

def find_from_format_by_filename(filename)
  if filename == 'package.json'
    'npm'
  elsif filename == 'CITATION.cff'
    'cff'
  end
end

#find_from_format_by_id(id) ⇒ `Object`

# File 'lib/briard/utils.rb', line 504

def find_from_format_by_id(id)
  id = normalize_id(id)

  if %r{\A(?:(http|https):/(/)?(dx\.)?(doi\.org|handle\.stage\.datacite\.org)/)?(doi:)?(10\.\d{4,5}/.+)\z}.match?(id)
    ra = get_doi_ra(id)
    %w[DataCite Crossref mEDRA KISTI JaLC OP].include?(ra) ? ra.downcase : nil
  elsif %r{\A(?:(http|https):/(/)?orcid\.org/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z}.match?(id)
    'orcid'
  elsif %r{\A(http|https):/(/)?github\.com/(.+)/package.json\z}.match?(id)
    'npm'
  elsif %r{\A(http|https):/(/)?github\.com/(.+)/codemeta.json\z}.match?(id)
    'codemeta'
  elsif %r{\A(http|https):/(/)?github\.com/(.+)/CITATION.cff\z}.match?(id)
    'cff'
  elsif %r{\A(http|https):/(/)?github\.com/(.+)\z}.match?(id)
    'cff'
  else
    'schema_org'
  end
end

#find_from_format_by_string(string) ⇒ `Object`

# File 'lib/briard/utils.rb', line 565

def find_from_format_by_string(string)
  if Maremma.from_xml(string).to_h.dig('crossref_result', 'query_result', 'body', 'query',
                                       'doi_record', 'crossref').present?
    'crossref'
  elsif Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find do |_k, v|
          v.start_with?('http://datacite.org/schema/kernel')
        end
    'datacite'
  elsif URI(Maremma.from_json(string).to_h.fetch('@context', '')).host == 'schema.org'
    'schema_org'
  elsif Maremma.from_json(string).to_h.dig('@context') == ('https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld')
    'codemeta'
  elsif Maremma.from_json(string).to_h.dig('schema-version').to_s.start_with?('http://datacite.org/schema/kernel')
    'datacite_json'
  elsif Maremma.from_json(string).to_h.dig('source') == ('Crossref')
    'crossref_json'
  elsif Maremma.from_json(string).to_h.dig('types').present? && Maremma.from_json(string).to_h.dig('publication_year').present?
    'crosscite'
  elsif Maremma.from_json(string).to_h.dig('issued', 'date-parts').present?
    'citeproc'
  elsif string.start_with?('TY  - ')
    'ris'
  elsif YAML.load(string).to_h.fetch('cff-version', nil).present?
    'cff'
  elsif BibTeX.parse(string).first
    'bibtex'
  end
rescue Psych::SyntaxError => e
  'bibtex'
rescue BibTeX::ParseError => e
  nil
end

#from_citeproc(element) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1055

def from_citeproc(element)
  Array.wrap(element).map do |a|
    if a['literal'].present?
      a['@type'] = 'Organization'
      a['name'] = a['literal']
    elsif a['name'].present?
      a['@type'] = 'Organization'
    else
      a['@type'] = 'Person'
      a['name'] = [a['given'], a['family']].compact.join(' ')
    end
    a['givenName'] = a['given']
    a['familyName'] = a['family']
    a.except('given', 'family', 'literal').compact
  end.unwrap
end

#from_datacite_json(element) ⇒ `Object`

# File 'lib/briard/utils.rb', line 775

def from_datacite_json(element)
  Array.wrap(element).map do |e|
    e.each_with_object({}) do |(k, v), h|
      h[k.underscore] = v
    end
  end
end

#from_schema_org(element) ⇒ `Object`

# File 'lib/briard/utils.rb', line 950

def from_schema_org(element)
  mapping = { '@type' => 'type', '@id' => 'id' }

  map_hash_keys(element: element, mapping: mapping)
end

#from_schema_org_contributors(element) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1001

def from_schema_org_contributors(element)
  element = Array.wrap(element).map do |c|
    if c['affiliation'].is_a?(String)
      c['affiliation'] = { 'name' => c['affiliation'] }
      affiliation_identifier_scheme = nil
      scheme_uri = nil
    elsif c.dig('affiliation', '@id').to_s.starts_with?('https://ror.org')
      affiliation_identifier_scheme = 'ROR'
      scheme_uri = 'https://ror.org/'
    elsif c.dig('affiliation', '@id').to_s.starts_with?('https://isni.org')
      affiliation_identifier_scheme = 'ISNI'
      scheme_uri = 'https://isni.org/isni/'
    else
      affiliation_identifier_scheme = nil
      scheme_uri = nil
    end

    if normalize_orcid(c['@id'])
      c['nameIdentifier'] =
        [{ '__content__' => c['@id'], 'nameIdentifierScheme' => 'ORCID',
           'schemeUri' => 'https://orcid.org' }]
    end
    c['contributorName'] =
      { 'nameType' => c['@type'].present? ? c['@type'].titleize + 'al' : nil,
        '__content__' => c['name'] }.compact
    c['affiliation'] =
      { '__content__' => c.dig('affiliation', 'name'),
        'affiliationIdentifier' => c.dig('affiliation', '@id'), 'affiliationIdentifierScheme' => affiliation_identifier_scheme, 'schemeUri' => scheme_uri }.compact.presence
    c.except('@id', '@type', 'name').compact
  end
end

#from_schema_org_creators(element) ⇒ `Object`

# File 'lib/briard/utils.rb', line 956

def from_schema_org_creators(element)
  element = Array.wrap(element).map do |c|
    if c['affiliation'].is_a?(String)
      c['affiliation'] = { 'name' => c['affiliation'] }
      affiliation_identifier_scheme = nil
      scheme_uri = nil
    elsif c.dig('affiliation', '@id').to_s.starts_with?('https://ror.org')
      affiliation_identifier_scheme = 'ROR'
      scheme_uri = 'https://ror.org/'
    elsif c.dig('affiliation', '@id').to_s.starts_with?('https://isni.org')
      affiliation_identifier_scheme = 'ISNI'
      scheme_uri = 'https://isni.org/isni/'
    else
      affiliation_identifier_scheme = nil
      scheme_uri = nil
    end

    # alternatively find the nameIdentifier in the identifer attribute
    c['@id'] = c['identifier'] if c['identifier'].present? && c['@id'].blank?

    # alternatively find the nameIdentifier in the sameAs attribute
    c['@id'] = c['sameAs'].first if Array(c['sameAs']).find do |item|
                                      URI(item).host == 'orcid.org'
                                    end

    if normalize_orcid(c['@id'])
      c['nameIdentifier'] =
        [{ '__content__' => c['@id'], 'nameIdentifierScheme' => 'ORCID',
           'schemeUri' => 'https://orcid.org' }]
    end
    if c['@type'].is_a?(Array)
      c['@type'] = c['@type'].find do |t|
        %w[Person Organization].include?(t)
      end
    end
    c['creatorName'] =
      { 'nameType' => c['@type'].present? ? c['@type'].titleize + 'al' : nil,
        '__content__' => c['name'] }.compact
    c['affiliation'] =
      { '__content__' => c.dig('affiliation', 'name'),
        'affiliationIdentifier' => c.dig('affiliation', '@id'), 'affiliationIdentifierScheme' => affiliation_identifier_scheme, 'schemeUri' => scheme_uri }.compact.presence
    c.except('@id', '@type', 'name').compact
  end
end

#get_contributor(contributor, contributor_type) ⇒ `Object`



1258
1259
1260

# File 'lib/briard/utils.rb', line 1258

def get_contributor(contributor, contributor_type)
  contributor.select { |c| c['contributorType'] == contributor_type }
end

#get_date(dates, date_type) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1253

def get_date(dates, date_type)
  dd = Array.wrap(dates).find { |d| d['dateType'] == date_type } || {}
  dd.fetch('date', nil)
end

#get_date_from_date_parts(date_as_parts) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1179

def get_date_from_date_parts(date_as_parts)
  date_parts = date_as_parts.fetch('date-parts', []).first
  return nil if date_parts == [nil]

  year = date_parts[0]
  month = date_parts[1]
  day = date_parts[2]
  get_date_from_parts(year, month, day)
rescue NoMethodError # if date_parts is nil
  nil
end

#get_date_from_parts(year, month = nil, day = nil) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1191

def get_date_from_parts(year, month = nil, day = nil)
  [year.to_s.rjust(4, '0'), month.to_s.rjust(2, '0'), day.to_s.rjust(2, '0')].reject do |part|
    part == '00'
  end.join('-')
end

#get_date_parts(iso8601_time) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1168

def get_date_parts(iso8601_time)
  return { 'date-parts' => [[]] } if iso8601_time.nil?

  year = iso8601_time[0..3].to_i
  month = iso8601_time[5..6].to_i
  day = iso8601_time[8..9].to_i
  { 'date-parts' => [[year, month, day].reject { |part| part == 0 }] }
rescue TypeError
  nil
end

#get_date_parts_from_parts(year, month = nil, day = nil) ⇒ `Object`



1197
1198
1199

# File 'lib/briard/utils.rb', line 1197

def get_date_parts_from_parts(year, month = nil, day = nil)
  { 'date-parts' => [[year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }] }
end

#get_datetime_from_iso8601(iso8601_time) ⇒ `Object`

parsing of incomplete iso8601 timestamps such as 2015-04 is broken in standard library return nil if invalid iso8601 timestamp

# File 'lib/briard/utils.rb', line 1229

def get_datetime_from_iso8601(iso8601_time)
  ISO8601::DateTime.new(iso8601_time).to_time.utc
rescue StandardError
  nil
end

#get_datetime_from_time(time) ⇒ `Object`

iso8601 datetime without hyphens and colons, used by Crossref return nil if invalid

# File 'lib/briard/utils.rb', line 1247

def get_datetime_from_time(time)
  DateTime.strptime(time.to_s, '%Y%m%d%H%M%S').strftime('%Y-%m-%dT%H:%M:%SZ')
rescue ArgumentError
  nil
end

#get_identifier(identifiers, identifier_type) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1262

def get_identifier(identifiers, identifier_type)
  id = Array.wrap(identifiers).find { |i| i['identifierType'] == identifier_type } || {}
  id.fetch('identifier', nil)
end

#get_identifier_type(identifier_type) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1267

def get_identifier_type(identifier_type)
  return nil unless identifier_type.present?

  identifierTypes = {
    'ark' => 'ARK',
    'arxiv' => 'arXiv',
    'bibcode' => 'bibcode',
    'doi' => 'DOI',
    'ean13' => 'EAN13',
    'eissn' => 'EISSN',
    'handle' => 'Handle',
    'igsn' => 'IGSN',
    'isbn' => 'ISBN',
    'issn' => 'ISSN',
    'istc' => 'ISTC',
    'lissn' => 'LISSN',
    'lsid' => 'LSID',
    'pmid' => 'PMID',
    'purl' => 'PURL',
    'upc' => 'UPC',
    'url' => 'URL',
    'urn' => 'URN',
    'md5' => 'md5',
    'minid' => 'minid',
    'dataguid' => 'dataguid'
  }

  identifierTypes[identifier_type.downcase] || identifier_type
end

#get_iso8601_date(iso8601_time) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1201

def get_iso8601_date(iso8601_time)
  return nil if iso8601_time.nil?

  iso8601_time[0..9]
end

#get_series_information(str) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1297

def get_series_information(str)
  return {} unless str.present?

  str = str.split(',').map(&:strip)

  title = str.first
  volume_issue = str.length > 2 ? str[1].rpartition(/\(([^)]+)\)/) : nil
  volume = volume_issue.present? ? volume_issue[0].presence || volume_issue[2].presence : nil
  issue = volume_issue.present? ? volume_issue[1][1...-1].presence : nil
  pages = str.length > 1 ? str.last : nil
  first_page = pages.present? ? pages.split('-').map(&:strip)[0] : nil
  last_page = pages.present? ? pages.split('-').map(&:strip)[1] : nil

  {
    'title' => title,
    'volume' => volume,
    'issue' => issue,
    'firstPage' => first_page,
    'lastPage' => last_page
  }.compact
end

#get_year_month(iso8601_time) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1207

def get_year_month(iso8601_time)
  return [] if iso8601_time.nil?

  year = iso8601_time[0..3]
  month = iso8601_time[5..6]

  [year.to_i, month.to_i].reject { |part| part == 0 }
end

#get_year_month_day(iso8601_time) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1216

def get_year_month_day(iso8601_time)
  return [] if iso8601_time.nil?

  year = iso8601_time[0..3]
  month = iso8601_time[5..6]
  day = iso8601_time[8..9]

  [year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }
end

#github_as_cff_url(url) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1158

def github_as_cff_url(url)
  github_hash = github_from_url(url)

  if github_hash[:path].to_s.end_with?('CITATION.cff')
    "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
  elsif github_hash[:owner].present?
    "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/main/CITATION.cff"
  end
end

#github_as_codemeta_url(url) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1148

def github_as_codemeta_url(url)
  github_hash = github_from_url(url)

  if github_hash[:path].to_s.end_with?('codemeta.json')
    "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
  elsif github_hash[:owner].present?
    "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/master/codemeta.json"
  end
end

#github_as_owner_url(url) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1129

def github_as_owner_url(url)
  github_hash = github_from_url(url)
  "https://github.com/#{github_hash[:owner]}" if github_hash[:owner].present?
end

#github_as_release_url(url) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1141

def github_as_release_url(url)
  github_hash = github_from_url(url)
  return unless github_hash[:release].present?

  "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}/tree/#{github_hash[:release]}"
end

#github_as_repo_url(url) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1134

def github_as_repo_url(url)
  github_hash = github_from_url(url)
  return unless github_hash[:repo].present?

  "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}"
end

#github_from_url(url) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1108

def github_from_url(url)
  return {} unless %r{\Ahttps://github\.com/(.+)(?:/)?(.+)?(?:/tree/)?(.*)\z}.match?(url)

  words = URI.parse(url).path[1..-1].split('/')
  path = words.length > 3 ? words[4...words.length].join('/') : nil

  { owner: words[0], repo: words[1], release: words[3], path: path }.compact
end

#github_owner_from_url(url) ⇒ `Object`



1125
1126
1127

# File 'lib/briard/utils.rb', line 1125

def github_owner_from_url(url)
  github_from_url(url).fetch(:owner, nil)
end

#github_release_from_url(url) ⇒ `Object`



1121
1122
1123

# File 'lib/briard/utils.rb', line 1121

def github_release_from_url(url)
  github_from_url(url).fetch(:release, nil)
end

#github_repo_from_url(url) ⇒ `Object`



1117
1118
1119

# File 'lib/briard/utils.rb', line 1117

def github_repo_from_url(url)
  github_from_url(url).fetch(:repo, nil)
end

#hsh_to_fos(hsh) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1418

def hsh_to_fos(hsh)
  # first find subject in Fields of Science (OECD)
  fos = JSON.load(File.read(File.expand_path('../../resources/oecd/fos-mappings.json',
                                             __dir__))).fetch('fosFields')
  subject = fos.find do |l|
    l['fosLabel'] == hsh['__content__'] || 'FOS: ' + l['fosLabel'] == hsh['__content__'] || l['fosLabel'] == hsh['subject']
  end

  if subject
    return [{
      'subject' => sanitize(hsh['__content__'] || hsh['subject']),
      'subjectScheme' => hsh['subjectScheme'],
      'schemeUri' => hsh['schemeURI'] || hsh['schemeUri'],
      'valueUri' => hsh['valueURI'] || hsh['valueUri'],
      'classificationCode' => hsh['classificationCode'],
      'lang' => hsh['lang']
    }.compact,
            {
              'subject' => 'FOS: ' + subject['fosLabel'],
              'subjectScheme' => 'Fields of Science and Technology (FOS)',
              'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf'
            }.compact]
  end

  # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
  # and map to Fields of Science. Add an extra entry for the latter
  fores = JSON.load(File.read(File.expand_path('../../resources/oecd/for-mappings.json',
                                               __dir__)))
  for_fields = fores.fetch('forFields')
  for_disciplines = fores.fetch('forDisciplines')

  # try to extract forId
  if hsh['subjectScheme'] == 'FOR'
    for_id = hsh['__content__'].to_s.split(' ').first || hsh['subject'].to_s.split(' ').first
    for_id = for_id.rjust(6, '0')

    subject = for_fields.find { |l| l['forId'] == for_id } ||
              for_disciplines.find { |l| l['forId'] == for_id[0..3] }
  else
    subject = for_fields.find do |l|
      l['forLabel'] == hsh['__content__'] || l['forLabel'] == hsh['subject']
    end ||
              for_disciplines.find do |l|
                l['forLabel'] == hsh['__content__'] || l['forLabel'] == hsh['subject']
              end
  end

  if subject
    [{
      'subject' => sanitize(hsh['__content__'] || hsh['subject']),
      'subjectScheme' => hsh['subjectScheme'],
      'classificationCode' => hsh['classificationCode'],
      'schemeUri' => hsh['schemeURI'] || hsh['schemeUri'],
      'valueUri' => hsh['valueURI'] || hsh['valueUri'],
      'lang' => hsh['lang']
    }.compact,
     {
       'subject' => 'FOS: ' + subject['fosLabel'],
       'subjectScheme' => 'Fields of Science and Technology (FOS)',
       'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf'
     }]
  else
    [{
      'subject' => sanitize(hsh['__content__'] || hsh['subject']),
      'subjectScheme' => hsh['subjectScheme'],
      'classificationCode' => hsh['classificationCode'],
      'schemeUri' => hsh['schemeURI'] || hsh['schemeUri'],
      'valueUri' => hsh['valueURI'] || hsh['valueUri'],
      'lang' => hsh['lang']
    }.compact]
  end
end

#hsh_to_spdx(hsh) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1348

def hsh_to_spdx(hsh)
  spdx = JSON.load(File.read(File.expand_path('../../resources/spdx/licenses.json',
                                              __dir__))).fetch('licenses')
  license = spdx.find do |l|
    l['licenseId'].casecmp?(hsh['rightsIdentifier']) || l['seeAlso'].first == normalize_cc_url(hsh['rightsURI']) || l['name'] == hsh['rights'] || l['seeAlso'].first == normalize_cc_url(hsh['rights'])
  end

  if license
    {
      'rights' => license['name'],
      'rightsUri' => license['seeAlso'].first,
      'rightsIdentifier' => license['licenseId'].downcase,
      'rightsIdentifierScheme' => 'SPDX',
      'schemeUri' => 'https://spdx.org/licenses/',
      'lang' => hsh['lang']
    }.compact
  else
    {
      'rights' => hsh['__content__'] || hsh['rights'],
      'rightsUri' => hsh['rightsURI'] || hsh['rightsUri'],
      'rightsIdentifier' => hsh['rightsIdentifier'].present? ? hsh['rightsIdentifier'].downcase : nil,
      'rightsIdentifierScheme' => hsh['rightsIdentifierScheme'],
      'schemeUri' => hsh['schemeUri'],
      'lang' => hsh['lang']
    }.compact
  end
end

#jsonlint(json) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1319

def jsonlint(json)
  return ['No JSON provided'] unless json.present?

  error_array = []
  linter = JsonLint::Linter.new
  linter.send(:check_data, json, error_array)
  error_array
end

#map_hash_keys(element: nil, mapping: nil) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1033

def map_hash_keys(element: nil, mapping: nil)
  Array.wrap(element).map do |a|
    a.map { |k, v| [mapping.fetch(k, k), v] }.reduce({}) do |hsh, (k, v)|
      if v.is_a?(Hash)
        hsh[k] = to_schema_org(v)
        hsh
      else
        hsh[k] = v
        hsh
      end
    end
  end.unwrap
end

#name_to_fos(name) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1376

def name_to_fos(name)
  # first find subject in Fields of Science (OECD)
  fos = JSON.load(File.read(File.expand_path('../../resources/oecd/fos-mappings.json',
                                             __dir__))).fetch('fosFields')

  subject = fos.find { |l| l['fosLabel'] == name || 'FOS: ' + l['fosLabel'] == name }

  if subject
    return [{
      'subject' => sanitize(name).downcase
    },
            {
              'subject' => 'FOS: ' + subject['fosLabel'],
              'subjectScheme' => 'Fields of Science and Technology (FOS)',
              'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf'
            }]
  end

  # if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
  # and map to Fields of Science. Add an extra entry for the latter
  fores = JSON.load(File.read(File.expand_path('../../resources/oecd/for-mappings.json',
                                               __dir__)))
  for_fields = fores.fetch('forFields')
  for_disciplines = fores.fetch('forDisciplines')

  subject = for_fields.find { |l| l['forLabel'] == name } ||
            for_disciplines.find { |l| l['forLabel'] == name }

  if subject
    [{
      'subject' => sanitize(name).downcase
    },
     {
       'subject' => 'FOS: ' + subject['fosLabel'],
       'subjectScheme' => 'Fields of Science and Technology (FOS)',
       'schemeUri' => 'http://www.oecd.org/science/inno/38235147.pdf'
     }]
  else
    [{ 'subject' => sanitize(name).downcase }]
  end
end

#name_to_spdx(name) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1328

def name_to_spdx(name)
  spdx = JSON.load(File.read(File.expand_path('../../resources/spdx/licenses.json',
                                              __dir__))).fetch('licenses')
  license = spdx.find do |l|
    l['name'] == name || l['licenseId'] == name || l['seeAlso'].first == normalize_cc_url(name)
  end

  if license
    {
      'rights' => license['name'],
      'rightsUri' => license['seeAlso'].first,
      'rightsIdentifier' => license['licenseId'].downcase,
      'rightsIdentifierScheme' => 'SPDX',
      'schemeUri' => 'https://spdx.org/licenses/'
    }.compact
  else
    { 'rights' => name }
  end
end

#normalize_cc_url(id) ⇒ `Object`

# File 'lib/briard/utils.rb', line 677

def normalize_cc_url(id)
  id = normalize_url(id, https: true)
  NORMALIZED_LICENSES.fetch(id, id)
end

#normalize_id(id, options = {}) ⇒ `Object`

# File 'lib/briard/utils.rb', line 638

def normalize_id(id, options = {})
  return nil unless id.present?

  # check for valid DOI
  doi = normalize_doi(id, options)
  return doi if doi.present?

  # check for valid HTTP uri
  uri = Addressable::URI.parse(id)
  return nil unless uri && uri.host && %w[http https].include?(uri.scheme)

  # clean up URL
  PostRank::URI.clean(id)
rescue Addressable::URI::InvalidURIError
  nil
end

#normalize_ids(ids: nil, relation_type: nil) ⇒ `Object`

# File 'lib/briard/utils.rb', line 690

def normalize_ids(ids: nil, relation_type: nil)
  Array.wrap(ids).select { |idx| idx['@id'].present? }.map do |idx|
    id = normalize_id(idx['@id'])
    related_identifier_type = doi_from_url(id).present? ? 'DOI' : 'URL'
    id = doi_from_url(id) || id

    { 'relatedIdentifier' => id,
      'relationType' => relation_type,
      'relatedIdentifierType' => related_identifier_type,
      'resourceTypeGeneral' => Metadata::SO_TO_DC_TRANSLATIONS[idx['@type']] }.compact
  end.unwrap
end

#normalize_issn(input, options = {}) ⇒ `Object`

pick electronic issn if there are multiple format issn as xxxx-xxxx

# File 'lib/briard/utils.rb', line 705

def normalize_issn(input, options = {})
  content = options[:content] || '__content__'

  issn = if input.blank?
           nil
         elsif input.is_a?(String) && options[:content].nil?
           input
         elsif input.is_a?(Hash)
           input.fetch(content, nil)
         elsif input.is_a?(Array)
           a = input.find { |a| a['media_type'] == 'electronic' } || input.first
           a.fetch(content, nil)
         end

  case issn.to_s.length
  when 9
    issn
  when 8
    issn[0..3] + '-' + issn[4..7]
  end
end

#normalize_licenses(licenses) ⇒ `Object`

find Creative Commons or OSI license in licenses array, normalize url and name

# File 'lib/briard/utils.rb', line 728

def normalize_licenses(licenses)
  standard_licenses = Array.wrap(licenses).map do |l|
                        URI.parse(l['url'])
                      end.select { |li| li.host && li.host[/(creativecommons.org|opensource.org)$/] }
  return licenses unless standard_licenses.present?

  # use HTTPS
  uri.scheme = 'https'

  # use host name without subdomain
  uri.host = Array(/(creativecommons.org|opensource.org)/.match uri.host).last

  # normalize URLs
  if uri.host == 'creativecommons.org'
    uri.path = uri.path.split('/')[0..-2].join('/') if uri.path.split('/').last == 'legalcode'
    uri.path << '/' unless uri.path.end_with?('/')
  else
    uri.path = uri.path.gsub(/(-license|\.php|\.html)/, '')
    uri.path = uri.path.sub(/(mit|afl|apl|osl|gpl|ecl)/) { |match| match.upcase }
    uri.path = uri.path.sub(/(artistic|apache)/) { |match| match.titleize }
    uri.path = uri.path.sub(/([^0-9-]+)(-)?([1-9])?(\.)?([0-9])?$/) do
      m = Regexp.last_match
      text = m[1]

      if m[3].present?
        version = [m[3], m[5].presence || '0'].join('.')
        [text, version].join('-')
      else
        text
      end
    end
  end

  uri.to_s
rescue URI::InvalidURIError
  nil
end

#normalize_orcid(orcid) ⇒ `Object`

# File 'lib/briard/utils.rb', line 682

def normalize_orcid(orcid)
  orcid = validate_orcid(orcid)
  return nil unless orcid.present?

  # turn ORCID ID into URL
  'https://orcid.org/' + Addressable::URI.encode(orcid)
end

#normalize_url(id, options = {}) ⇒ `Object`

# File 'lib/briard/utils.rb', line 655

def normalize_url(id, options = {})
  return nil unless id.present?

  # handle info URIs
  return id if id.to_s.start_with?('info')

  # check for valid HTTP uri
  uri = Addressable::URI.parse(id)

  return nil unless uri && uri.host && %w[http https ftp].include?(uri.scheme)

  # optionally turn into https URL
  uri.scheme = 'https' if options[:https]

  # clean up URL
  uri.path = PostRank::URI.clean(uri.path)

  uri.to_s
rescue Addressable::URI::InvalidURIError
  nil
end

#orcid_as_url(orcid) ⇒ `Object`



602
603
604

# File 'lib/briard/utils.rb', line 602

def orcid_as_url(orcid)
  "https://orcid.org/#{orcid}" if orcid.present?
end

#orcid_from_url(url) ⇒ `Object`



598
599
600

# File 'lib/briard/utils.rb', line 598

def orcid_from_url(url)
  Array(%r{\A:(http|https)://orcid\.org/(.+)}.match(url)).last
end

#parse_attributes(element, options = {}) ⇒ `Object`

# File 'lib/briard/utils.rb', line 625

def parse_attributes(element, options = {})
  content = options[:content] || '__content__'

  if element.is_a?(String) && options[:content].nil?
    CGI.unescapeHTML(element)
  elsif element.is_a?(Hash)
    element.fetch(CGI.unescapeHTML(content), nil)
  elsif element.is_a?(Array)
    a = element.map { |e| e.is_a?(Hash) ? e.fetch(CGI.unescapeHTML(content), nil) : e }.uniq
    a = options[:first] ? a.first : a.unwrap
  end
end

#sanitize(text, options = {}) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1092

def sanitize(text, options = {})
  options[:tags] ||= Set.new(%w[strong em b i code pre sub sup br])
  content = options[:content] || '__content__'
  custom_scrubber = Briard::WhitelistScrubber.new(options)

  if text.is_a?(String)
    # remove excessive internal whitespace with squish
    Loofah.scrub_fragment(text, custom_scrubber).to_s.squish
  elsif text.is_a?(Hash)
    sanitize(text.fetch(content, nil))
  elsif text.is_a?(Array)
    a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil)) : sanitize(e) }.uniq
    a = options[:first] ? a.first : a.unwrap
  end
end

#strip_milliseconds(iso8601_time) ⇒ `Object`

strip milliseconds if there is a time, as it interferes with edtc parsing keep dates unchanged

# File 'lib/briard/utils.rb', line 1237

def strip_milliseconds(iso8601_time)
  return iso8601_time.split(' ').first if iso8601_time.to_s.include? ' '

  return iso8601_time.split('.').first + 'Z' if iso8601_time.to_s.include? '.'

  iso8601_time
end

#to_citeproc(element) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1072

def to_citeproc(element)
  Array.wrap(element).map do |a|
    a['family'] = a['familyName']
    a['given'] = a['givenName']
    a['literal'] = a['name'] unless a['familyName'].present?
    a.except('nameType', 'type', '@type', 'id', '@id', 'name', 'familyName', 'givenName',
             'affiliation', 'nameIdentifiers', 'contributorType').compact
  end.presence
end

#to_datacite_json(element, options = {}) ⇒ `Object`

# File 'lib/briard/utils.rb', line 766

def to_datacite_json(element, options = {})
  a = Array.wrap(element).map do |e|
    e.each_with_object({}) do |(k, v), h|
      h[k.dasherize] = v
    end
  end
  options[:first] ? a.unwrap : a.presence
end

#to_identifier(identifier) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1047

def to_identifier(identifier)
  {
    '@type' => 'PropertyValue',
    'propertyID' => identifier['relatedIdentifierType'],
    'value' => identifier['relatedIdentifier']
  }
end

#to_ris(element) ⇒ `Object`

# File 'lib/briard/utils.rb', line 1082

def to_ris(element)
  Array.wrap(element).map do |a|
    if a['familyName'].present?
      [a['familyName'], a['givenName']].join(', ')
    else
      a['name']
    end
  end.unwrap
end

#to_schema_org(element) ⇒ `Object`

# File 'lib/briard/utils.rb', line 783

def to_schema_org(element)
  mapping = { 'type' => '@type', 'id' => '@id', 'title' => 'name' }

  map_hash_keys(element: element, mapping: mapping)
end

#to_schema_org_container(element, options = {}) ⇒ `Object`

# File 'lib/briard/utils.rb', line 831

def to_schema_org_container(element, options = {})
  return nil unless element.is_a?(Hash) || (element.nil? && options[:container_title].present?)

  {
    '@id' => element['identifier'],
    '@type' => options[:type] == 'Dataset' ? 'DataCatalog' : 'Periodical',
    'name' => element['title'] || options[:container_title]
  }.compact
end

#to_schema_org_contributors(element) ⇒ `Object`

# File 'lib/briard/utils.rb', line 810

def to_schema_org_contributors(element)
  element = Array.wrap(element).map do |c|
    c['affiliation'] = Array.wrap(c['affiliation']).map do |a|
      if a.is_a?(String)
        name = a
        affiliation_identifier = nil
      else
        name = a['name']
        affiliation_identifier = a['affiliationIdentifier']
      end

      { '@type' => 'Organization', '@id' => affiliation_identifier, 'name' => name }.compact
    end.unwrap
    c['@type'] = c['nameType'].present? ? c['nameType'][0..-3] : nil
    c['@id'] = Array.wrap(c['nameIdentifiers']).first.to_h.fetch('nameIdentifier', nil)
    c['name'] =
      c['familyName'].present? ? [c['givenName'], c['familyName']].join(' ') : c['name']
    c.except('nameIdentifiers', 'nameType').compact
  end.unwrap
end

#to_schema_org_creators(element) ⇒ `Object`

# File 'lib/briard/utils.rb', line 789

def to_schema_org_creators(element)
  element = Array.wrap(element).map do |c|
    c['affiliation'] = Array.wrap(c['affiliation']).map do |a|
      if a.is_a?(String)
        name = a
        affiliation_identifier = nil
      else
        name = a['name']
        affiliation_identifier = a['affiliationIdentifier']
      end

      { '@type' => 'Organization', '@id' => affiliation_identifier, 'name' => name }.compact
    end.unwrap
    c['@type'] = c['nameType'].present? ? c['nameType'][0..-3] : nil
    c['@id'] = Array.wrap(c['nameIdentifiers']).first.to_h.fetch('nameIdentifier', nil)
    c['name'] =
      c['familyName'].present? ? [c['givenName'], c['familyName']].join(' ') : c['name']
    c.except('nameIdentifiers', 'nameType').compact
  end.unwrap
end

#to_schema_org_funder(funding_references) ⇒ `Object`

# File 'lib/briard/utils.rb', line 875

def to_schema_org_funder(funding_references)
  return nil unless funding_references.present?

  Array.wrap(funding_references).map do |fr|
    {
      '@id' => fr['funderIdentifier'],
      '@type' => 'Organization',
      'name' => fr['funderName']
    }.compact
  end.unwrap
end

#to_schema_org_identifiers(element, _options = {}) ⇒ `Object`

# File 'lib/briard/utils.rb', line 841

def to_schema_org_identifiers(element, _options = {})
  Array.wrap(element).map do |ai|
    {
      '@type' => 'PropertyValue',
      'propertyID' => ai['identifierType'],
      'value' => ai['identifier']
    }
  end.unwrap
end

#to_schema_org_relation(related_identifiers: nil, relation_type: nil) ⇒ `Object`

# File 'lib/briard/utils.rb', line 851

def to_schema_org_relation(related_identifiers: nil, relation_type: nil)
  return nil unless related_identifiers.present? && relation_type.present?

  relation_type = if relation_type == 'References'
                    %w[References Cites
                       Documents]
                  else
                    [relation_type]
                  end

  Array.wrap(related_identifiers).select do |ri|
    relation_type.include?(ri['relationType'])
  end.map do |r|
    if r['relatedIdentifierType'] == 'ISSN' && r['relationType'] == 'IsPartOf'
      { '@type' => 'Periodical', 'issn' => r['relatedIdentifier'] }.compact
    else
      {
        '@id' => normalize_id(r['relatedIdentifier']),
        '@type' => DC_TO_SO_TRANSLATIONS[r['resourceTypeGeneral']] || 'CreativeWork'
      }.compact
    end
  end.unwrap
end

#to_schema_org_spatial_coverage(geo_location) ⇒ `Object`

# File 'lib/briard/utils.rb', line 887

def to_schema_org_spatial_coverage(geo_location)
  return nil unless geo_location.present?

  Array.wrap(geo_location).each_with_object([]) do |gl, sum|
    if gl.fetch('geoLocationPoint', nil)
      sum << {
        '@type' => 'Place',
        'geo' => {
          '@type' => 'GeoCoordinates',
          'address' => gl['geoLocationPlace'],
          'latitude' => gl.dig('geoLocationPoint', 'pointLatitude'),
          'longitude' => gl.dig('geoLocationPoint', 'pointLongitude')
        }
      }.compact
    end

    if gl.fetch('geoLocationBox', nil)
      sum << {
        '@type' => 'Place',
        'geo' => {
          '@type' => 'GeoShape',
          'address' => gl['geoLocationPlace'],
          'box' => [gl.dig('geoLocationBox', 'southBoundLatitude'),
                    gl.dig('geoLocationBox', 'westBoundLongitude'),
                    gl.dig('geoLocationBox', 'northBoundLatitude'),
                    gl.dig('geoLocationBox', 'eastBoundLongitude')].compact.join(' ').presence
        }.compact
      }.compact
    end

    if gl.fetch('geoLocationPolygon', nil)
      sum << {
        '@type' => 'Place',
        'geo' => {
          '@type' => 'GeoShape',
          'address' => gl['geoLocationPlace'],
          'polygon' => Array.wrap(gl.dig('geoLocationPolygon')).map do |glp|
            Array.wrap(glp).map do |glpp|
              [glpp.dig('polygonPoint', 'pointLongitude'),
               glpp.dig('polygonPoint', 'pointLatitude')].compact
            end.compact
          end.compact.presence
        }
      }
    end

    next unless gl.fetch('geoLocationPlace',
                         nil) && !gl.fetch('geoLocationPoint',
                                           nil) && !gl.fetch('geoLocationBox',
                                                             nil) && !gl.fetch(
                                                               'geoLocationPolygon', nil
                                                             )

    sum << {
      '@type' => 'Place',
      'geo' => {
        '@type' => 'GeoCoordinates',
        'address' => gl['geoLocationPlace']
      }
    }.compact
  end.unwrap
end

#validate_orcid(orcid) ⇒ `Object`

# File 'lib/briard/utils.rb', line 606

def validate_orcid(orcid)
  orcid = Array(%r{\A(?:(?:http|https)://(?:(?:www|sandbox)?\.)?orcid\.org/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z}.match(orcid)).last
  orcid.gsub(/[[:space:]]/, '-') if orcid.present?
end

#validate_orcid_scheme(orcid_scheme) ⇒ `Object`



611
612
613

# File 'lib/briard/utils.rb', line 611

def validate_orcid_scheme(orcid_scheme)
  Array(%r{\A(http|https)://(www\.)?(orcid\.org)}.match(orcid_scheme)).last
end

#validate_url(str) ⇒ `Object`

# File 'lib/briard/utils.rb', line 615

def validate_url(str)
  if %r{\A(?:(http|https)://(dx\.)?doi.org/)?(doi:)?(10\.\d{4,5}/.+)\z}.match?(str)
    'DOI'
  elsif %r{\A(http|https)://}.match?(str)
    'URL'
  elsif /\A(ISSN|eISSN) (\d{4}-\d{3}[0-9X]+)\z/.match?(str)
    'ISSN'
  end
end

Module: Briard::Utils

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#decode_doi(doi) ⇒ Object

#encode_doi(prefix) ⇒ Object

#find_from_format(id: nil, string: nil, ext: nil, filename: nil) ⇒ Object

#find_from_format_by_ext(string, options = {}) ⇒ Object

#find_from_format_by_filename(filename) ⇒ Object

#find_from_format_by_id(id) ⇒ Object

#find_from_format_by_string(string) ⇒ Object

#from_citeproc(element) ⇒ Object

#from_datacite_json(element) ⇒ Object

#from_schema_org(element) ⇒ Object

#from_schema_org_contributors(element) ⇒ Object

#from_schema_org_creators(element) ⇒ Object

#get_contributor(contributor, contributor_type) ⇒ Object

#get_date(dates, date_type) ⇒ Object

#get_date_from_date_parts(date_as_parts) ⇒ Object

#get_date_from_parts(year, month = nil, day = nil) ⇒ Object

#get_date_parts(iso8601_time) ⇒ Object

#get_date_parts_from_parts(year, month = nil, day = nil) ⇒ Object

#get_datetime_from_iso8601(iso8601_time) ⇒ Object

#get_datetime_from_time(time) ⇒ Object

#get_identifier(identifiers, identifier_type) ⇒ Object

#get_identifier_type(identifier_type) ⇒ Object

#get_iso8601_date(iso8601_time) ⇒ Object

#get_series_information(str) ⇒ Object

#get_year_month(iso8601_time) ⇒ Object

#get_year_month_day(iso8601_time) ⇒ Object

#github_as_cff_url(url) ⇒ Object

#github_as_codemeta_url(url) ⇒ Object

#github_as_owner_url(url) ⇒ Object

#github_as_release_url(url) ⇒ Object

#github_as_repo_url(url) ⇒ Object

#github_from_url(url) ⇒ Object

#github_owner_from_url(url) ⇒ Object

#github_release_from_url(url) ⇒ Object

#github_repo_from_url(url) ⇒ Object

#hsh_to_fos(hsh) ⇒ Object

#hsh_to_spdx(hsh) ⇒ Object

#jsonlint(json) ⇒ Object

#map_hash_keys(element: nil, mapping: nil) ⇒ Object

#name_to_fos(name) ⇒ Object

#name_to_spdx(name) ⇒ Object

#normalize_cc_url(id) ⇒ Object

#normalize_id(id, options = {}) ⇒ Object

#normalize_ids(ids: nil, relation_type: nil) ⇒ Object

#normalize_issn(input, options = {}) ⇒ Object

#normalize_licenses(licenses) ⇒ Object

#normalize_orcid(orcid) ⇒ Object

#normalize_url(id, options = {}) ⇒ Object

#orcid_as_url(orcid) ⇒ Object

#orcid_from_url(url) ⇒ Object

#parse_attributes(element, options = {}) ⇒ Object

#sanitize(text, options = {}) ⇒ Object

#strip_milliseconds(iso8601_time) ⇒ Object

#to_citeproc(element) ⇒ Object

#to_datacite_json(element, options = {}) ⇒ Object

#to_identifier(identifier) ⇒ Object

#to_ris(element) ⇒ Object

#to_schema_org(element) ⇒ Object

#to_schema_org_container(element, options = {}) ⇒ Object

#to_schema_org_contributors(element) ⇒ Object

#to_schema_org_creators(element) ⇒ Object

#to_schema_org_funder(funding_references) ⇒ Object

#to_schema_org_identifiers(element, _options = {}) ⇒ Object

#to_schema_org_relation(related_identifiers: nil, relation_type: nil) ⇒ Object

#to_schema_org_spatial_coverage(geo_location) ⇒ Object

#validate_orcid(orcid) ⇒ Object

#validate_orcid_scheme(orcid_scheme) ⇒ Object

#validate_url(str) ⇒ Object