Module: Bolognese::Utils
- Included in:
- CLI, MetadataUtils
- Defined in:
- lib/bolognese/utils.rb
Constant Summary collapse
- LICENSE_NAMES =
{ "http://creativecommons.org/publicdomain/zero/1.0/" => "Public Domain (CC0 1.0)", "http://creativecommons.org/licenses/by/3.0/" => "Creative Commons Attribution 3.0 (CC-BY 3.0)", "http://creativecommons.org/licenses/by/4.0/" => "Creative Commons Attribution 4.0 (CC-BY 4.0)", "http://creativecommons.org/licenses/by-nc/4.0/" => "Creative Commons Attribution Noncommercial 4.0 (CC-BY-NC 4.0)", "http://creativecommons.org/licenses/by-sa/4.0/" => "Creative Commons Attribution Share Alike 4.0 (CC-BY-SA 4.0)", "http://creativecommons.org/licenses/by-nc-nd/4.0/" => "Creative Commons Attribution Noncommercial No Derivatives 4.0 (CC-BY-NC-ND 4.0)" }
- DC_TO_SO_TRANSLATIONS =
{ "Audiovisual" => "MediaObject", "Collection" => "Collection", "Dataset" => "Dataset", "Event" => "Event", "Image" => "ImageObject", "InteractiveResource" => nil, "Model" => nil, "PhysicalObject" => nil, "Service" => "Service", "Software" => "SoftwareSourceCode", "Sound" => "AudioObject", "Text" => "ScholarlyArticle", "Workflow" => nil, "Other" => "CreativeWork" }
- DC_TO_CP_TRANSLATIONS =
{ "Audiovisual" => "motion_picture", "Collection" => nil, "Dataset" => "dataset", "Event" => nil, "Image" => "graphic", "InteractiveResource" => nil, "Model" => nil, "PhysicalObject" => nil, "Service" => nil, "Sound" => "song", "Text" => "report", "Workflow" => nil, "Other" => nil }
- CR_TO_CP_TRANSLATIONS =
{ "Proceedings" => nil, "ReferenceBook" => nil, "JournalIssue" => nil, "ProceedingsArticle" => "paper-conference", "Other" => nil, "Dissertation" => "thesis", "Dataset" => "dataset", "EditedBook" => "book", "JournalArticle" => "article-journal", "Journal" => nil, "Report" => "report", "BookSeries" => nil, "ReportSeries" => nil, "BookTrack" => nil, "Standard" => nil, "BookSection" => "chapter", "BookPart" => nil, "Book" => "book", "BookChapter" => "chapter", "StandardSeries" => nil, "Monograph" => "book", "Component" => nil, "ReferenceEntry" => "entry-dictionary", "JournalVolume" => nil, "BookSet" => nil }
- CR_TO_SO_TRANSLATIONS =
{ "Proceedings" => nil, "ReferenceBook" => "Book", "JournalIssue" => "PublicationIssue", "ProceedingsArticle" => nil, "Other" => "CreativeWork", "Dissertation" => "Thesis", "Dataset" => "Dataset", "EditedBook" => "Book", "JournalArticle" => "ScholarlyArticle", "Journal" => nil, "Report" => nil, "BookSeries" => nil, "ReportSeries" => nil, "BookTrack" => nil, "Standard" => nil, "BookSection" => nil, "BookPart" => nil, "Book" => "Book", "BookChapter" => "Chapter", "StandardSeries" => nil, "Monograph" => "Book", "Component" => "CreativeWork", "ReferenceEntry" => nil, "JournalVolume" => "PublicationVolume", "BookSet" => nil, "PostedContent" => "ScholarlyArticle" }
- CR_TO_BIB_TRANSLATIONS =
{ "Proceedings" => "proceedings", "ReferenceBook" => "book", "JournalIssue" => nil, "ProceedingsArticle" => nil, "Other" => nil, "Dissertation" => "phdthesis", "Dataset" => nil, "EditedBook" => "book", "JournalArticle" => "article", "Journal" => nil, "Report" => nil, "BookSeries" => nil, "ReportSeries" => nil, "BookTrack" => nil, "Standard" => nil, "BookSection" => "inbook", "BookPart" => nil, "Book" => "book", "BookChapter" => "inbook", "StandardSeries" => nil, "Monograph" => "book", "Component" => nil, "ReferenceEntry" => nil, "JournalVolume" => nil, "BookSet" => nil, "PostedContent" => "article" }
- BIB_TO_CR_TRANSLATIONS =
{ "proceedings" => "Proceedings", "phdthesis" => "Dissertation", "article" => "JournalArticle", "book" => "Book", "inbook" => "BookChapter" }
- CR_TO_JATS_TRANSLATIONS =
{ "Proceedings" => "working-paper", "ReferenceBook" => "book", "JournalIssue" => "journal", "ProceedingsArticle" => "working-paper", "Other" => nil, "Dissertation" => nil, "Dataset" => "data", "EditedBook" => "book", "JournalArticle" => "journal", "Journal" => "journal", "Report" => "report", "BookSeries" => "book", "ReportSeries" => "report", "BookTrack" => "book", "Standard" => "standard", "BookSection" => "chapter", "BookPart" => "chapter", "Book" => "book", "BookChapter" => "chapter", "StandardSeries" => "standard", "Monograph" => "book", "Component" => nil, "ReferenceEntry" => nil, "JournalVolume" => "journal", "BookSet" => "book" }
- SO_TO_DC_TRANSLATIONS =
{ "Article" => "Text", "AudioObject" => "Sound", "Blog" => "Text", "BlogPosting" => "Text", "Chapter" => "Text", "Collection" => "Collection", "CreativeWork" => "Other", "DataCatalog" => "Dataset", "Dataset" => "Dataset", "Event" => "Event", "ImageObject" => "Image", "Movie" => "Audiovisual", "PublicationIssue" => "Text", "ScholarlyArticle" => "Text", "Thesis" => "Text", "Service" => "Service", "SoftwareSourceCode" => "Software", "VideoObject" => "Audiovisual", "WebPage" => "Text", "WebSite" => "Text" }
- SO_TO_JATS_TRANSLATIONS =
{ "Article" => "journal", "AudioObject" => nil, "Blog" => nil, "BlogPosting" => nil, "Book" => "book", "Collection" => nil, "CreativeWork" => nil, "DataCatalog" => "data", "Dataset" => "data", "Event" => nil, "ImageObject" => nil, "Movie" => nil, "PublicationIssue" => "journal", "ScholarlyArticle" => "journal", "Service" => nil, "SoftwareSourceCode" => "software", "VideoObject" => nil, "WebPage" => nil, "WebSite" => "website" }
- SO_TO_CP_TRANSLATIONS =
{ "Article" => "", "AudioObject" => "song", "Blog" => "report", "BlogPosting" => "post-weblog", "Collection" => nil, "CreativeWork" => nil, "DataCatalog" => "dataset", "Dataset" => "dataset", "Event" => nil, "ImageObject" => "graphic", "Movie" => "motion_picture", "PublicationIssue" => nil, "ScholarlyArticle" => "article-journal", "Service" => nil, "Thesis" => "thesis", "VideoObject" => "broadcast", "WebPage" => "webpage", "WebSite" => "webpage" }
- SO_TO_RIS_TRANSLATIONS =
{ "Article" => nil, "AudioObject" => nil, "Blog" => nil, "BlogPosting" => "BLOG", "Collection" => nil, "CreativeWork" => "GEN", "DataCatalog" => "CTLG", "Dataset" => "DATA", "Event" => nil, "ImageObject" => "FIGURE", "Movie" => "MPCT", "PublicationIssue" => nil, "ScholarlyArticle" => "JOUR", "Service" => nil, "SoftwareSourceCode" => "COMP", "VideoObject" => "VIDEO", "WebPage" => "ELEC", "WebSite" => nil }
- CR_TO_RIS_TRANSLATIONS =
{ "Proceedings" => "CONF", "ReferenceBook" => "BOOK", "JournalIssue" => nil, "ProceedingsArticle" => "CPAPER", "Other" => "GEN", "Dissertation" => "THES", "Dataset" => "DATA", "EditedBook" => "BOOK", "JournalArticle" => "JOUR", "Journal" => nil, "Report" => nil, "BookSeries" => nil, "ReportSeries" => nil, "BookTrack" => nil, "Standard" => nil, "BookSection" => "CHAP", "BookPart" => "CHAP", "Book" => "BOOK", "BookChapter" => "CHAP", "StandardSeries" => nil, "Monograph" => "BOOK", "Component" => nil, "ReferenceEntry" => "DICT", "JournalVolume" => nil, "BookSet" => nil }
- DC_TO_RIS_TRANSLATIONS =
{ "Audiovisual" => "MPCT", "Collection" => nil, "Dataset" => "DATA", "Event" => nil, "Image" => "FIGURE", "InteractiveResource" => nil, "Model" => nil, "PhysicalObject" => nil, "Service" => nil, "Software" => "COMP", "Sound" => "SOUND", "Text" => "RPRT", "Workflow" => nil, "Other" => nil }
- SO_TO_BIB_TRANSLATIONS =
{ "Article" => "article", "AudioObject" => "misc", "Thesis" => "phdthesis", "Blog" => "misc", "BlogPosting" => "article", "Collection" => "misc", "CreativeWork" => "misc", "DataCatalog" => "misc", "Dataset" => "misc", "Event" => "misc", "ImageObject" => "misc", "Movie" => "misc", "PublicationIssue" => "misc", "ScholarlyArticle" => "article", "Service" => "misc", "SoftwareSourceCode" => "misc", "VideoObject" => "misc", "WebPage" => "misc", "WebSite" => "misc" }
Instance Method Summary collapse
- #find_from_format(id: nil, string: nil, ext: nil) ⇒ Object
- #find_from_format_by_ext(string, options = {}) ⇒ Object
- #find_from_format_by_id(id) ⇒ Object
- #find_from_format_by_string(string) ⇒ Object
- #from_citeproc(element) ⇒ Object
- #from_schema_org(element) ⇒ Object
- #get_date_from_date_parts(date_as_parts) ⇒ Object
- #get_date_from_parts(year, month = nil, day = nil) ⇒ Object
- #get_date_parts(iso8601_time) ⇒ Object
- #get_date_parts_from_parts(year, month = nil, day = nil) ⇒ Object
-
#get_datetime_from_iso8601(iso8601_time) ⇒ Object
parsing of incomplete iso8601 timestamps such as 2015-04 is broken in standard library return nil if invalid iso8601 timestamp.
- #get_year_month(iso8601_time) ⇒ Object
- #get_year_month_day(iso8601_time) ⇒ Object
- #github_as_codemeta_url(url) ⇒ Object
- #github_as_owner_url(url) ⇒ Object
- #github_as_release_url(url) ⇒ Object
- #github_as_repo_url(url) ⇒ Object
- #github_from_url(url) ⇒ Object
- #github_owner_from_url(url) ⇒ Object
- #github_release_from_url(url) ⇒ Object
- #github_repo_from_url(url) ⇒ Object
- #jsonlint(json) ⇒ Object
- #map_hash_keys(element: nil, mapping: nil) ⇒ Object
- #normalize_id(id, options = {}) ⇒ Object
- #normalize_ids(ids: nil) ⇒ Object
-
#normalize_licenses(licenses) ⇒ Object
find Creative Commons or OSI license in licenses array, normalize url and name.
- #normalize_orcid(orcid) ⇒ Object
- #normalize_url(id) ⇒ Object
- #orcid_as_url(orcid) ⇒ Object
- #orcid_from_url(url) ⇒ Object
- #parse_attributes(element, options = {}) ⇒ Object
- #sanitize(text, options = {}) ⇒ Object
- #to_citeproc(element) ⇒ Object
- #to_ris(element) ⇒ Object
- #to_schema_org(element) ⇒ Object
- #to_schema_org_container(element, options = {}) ⇒ Object
- #validate_orcid(orcid) ⇒ Object
- #validate_orcid_scheme(orcid_scheme) ⇒ Object
- #validate_url(str) ⇒ Object
Instance Method Details
#find_from_format(id: nil, string: nil, ext: nil) ⇒ Object
323 324 325 326 327 328 329 330 331 332 333 |
# File 'lib/bolognese/utils.rb', line 323 def find_from_format(id: nil, string: nil, ext: nil) if id.present? find_from_format_by_id(id) elsif ext.present? find_from_format_by_ext(string, ext: ext) elsif string.present? find_from_format_by_string(string) else "datacite" end end |
#find_from_format_by_ext(string, options = {}) ⇒ Object
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 |
# File 'lib/bolognese/utils.rb', line 350 def find_from_format_by_ext(string, ={}) if [:ext] == ".bib" "bibtex" elsif [:ext] == ".ris" "ris" elsif [:ext] == ".xml" && Maremma.from_xml(string).to_h.dig("doi_records", "doi_record", "crossref") "crossref" elsif [:ext] == ".xml" && Maremma.from_xml(string).to_h.dig("resource", "xmlns").to_s.start_with?("http://datacite.org/schema/kernel") "datacite" elsif [:ext] == ".json" && Maremma.from_json(string).to_h.dig("ris_type") "crosscite" elsif [:ext] == ".json" && Maremma.from_json(string).to_h.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel") "datacite_json" elsif [:ext] == ".json" && Maremma.from_json(string).to_h.dig("issued", "date-parts").present? "citeproc" elsif [:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org") "schema_org" elsif [:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld") "codemeta" end end |
#find_from_format_by_id(id) ⇒ Object
335 336 337 338 339 340 341 342 343 344 345 346 347 348 |
# File 'lib/bolognese/utils.rb', line 335 def find_from_format_by_id(id) id = normalize_id(id) if /\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(id) ra = get_doi_ra(id) %w(DataCite Crossref).include?(ra) ? ra.downcase : nil elsif /\A(?:(http|https):\/(\/)?orcid\.org\/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z/.match(id) "orcid" elsif /\A(http|https):\/(\/)?github\.com\/(.+)\z/.match(id) "codemeta" else "schema_org" end end |
#find_from_format_by_string(string) ⇒ Object
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 |
# File 'lib/bolognese/utils.rb', line 372 def find_from_format_by_string(string) if Maremma.from_xml(string).to_h.dig("doi_records", "doi_record", "crossref").present? "crossref" elsif Maremma.from_xml(string).to_h.dig("resource", "xmlns").to_s.start_with?("http://datacite.org/schema/kernel") "datacite" elsif Maremma.from_json(string).to_h.dig("ris_type").present? "crosscite" elsif Maremma.from_json(string).to_h.dig("schemaVersion").to_s.start_with?("http://datacite.org/schema/kernel") "datacite_json" elsif Maremma.from_json(string).to_h.dig("issued", "date-parts").present? "citeproc" elsif Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org") "schema_org" elsif Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld") "codemeta" elsif string.start_with?("TY - ") "ris" elsif BibTeX.parse(string).first "bibtex" end end |
#from_citeproc(element) ⇒ Object
555 556 557 558 559 560 561 562 563 564 565 566 567 568 |
# File 'lib/bolognese/utils.rb', line 555 def from_citeproc(element) Array.wrap(element).map do |a| if a["literal"].present? a["@type"] = "Organization" a["name"] = a["literal"] else a["@type"] = "Person" a["name"] = [a["given"], a["family"]].compact.join(" ") end a["givenName"] = a["given"] a["familyName"] = a["family"] a.except("given", "family", "literal").compact end.unwrap end |
#from_schema_org(element) ⇒ Object
535 536 537 538 539 |
# File 'lib/bolognese/utils.rb', line 535 def from_schema_org(element) mapping = { "@type" => "type", "@id" => "id" } map_hash_keys(element: element, mapping: mapping) end |
#get_date_from_date_parts(date_as_parts) ⇒ Object
663 664 665 666 667 |
# File 'lib/bolognese/utils.rb', line 663 def get_date_from_date_parts(date_as_parts) date_parts = date_as_parts.fetch("date-parts", []).first year, month, day = date_parts[0], date_parts[1], date_parts[2] get_date_from_parts(year, month, day) end |
#get_date_from_parts(year, month = nil, day = nil) ⇒ Object
669 670 671 |
# File 'lib/bolognese/utils.rb', line 669 def get_date_from_parts(year, month = nil, day = nil) [year.to_s.rjust(4, '0'), month.to_s.rjust(2, '0'), day.to_s.rjust(2, '0')].reject { |part| part == "00" }.join("-") end |
#get_date_parts(iso8601_time) ⇒ Object
654 655 656 657 658 659 660 661 |
# File 'lib/bolognese/utils.rb', line 654 def get_date_parts(iso8601_time) return { 'date-parts' => [[]] } if iso8601_time.nil? year = iso8601_time[0..3].to_i month = iso8601_time[5..6].to_i day = iso8601_time[8..9].to_i { 'date-parts' => [[year, month, day].reject { |part| part == 0 }] } end |
#get_date_parts_from_parts(year, month = nil, day = nil) ⇒ Object
673 674 675 |
# File 'lib/bolognese/utils.rb', line 673 def get_date_parts_from_parts(year, month = nil, day = nil) { 'date-parts' => [[year.to_i, month.to_i, day.to_i].reject { |part| part == 0 }] } end |
#get_datetime_from_iso8601(iso8601_time) ⇒ Object
parsing of incomplete iso8601 timestamps such as 2015-04 is broken in standard library return nil if invalid iso8601 timestamp
699 700 701 702 703 |
# File 'lib/bolognese/utils.rb', line 699 def get_datetime_from_iso8601(iso8601_time) ISO8601::DateTime.new(iso8601_time).to_time.utc rescue nil end |
#get_year_month(iso8601_time) ⇒ Object
677 678 679 680 681 682 683 684 |
# File 'lib/bolognese/utils.rb', line 677 def get_year_month(iso8601_time) return [] if iso8601_time.nil? year = iso8601_time[0..3] month = iso8601_time[5..6] [year.to_i, month.to_i].reject { |part| part == 0 } end |
#get_year_month_day(iso8601_time) ⇒ Object
686 687 688 689 690 691 692 693 694 |
# File 'lib/bolognese/utils.rb', line 686 def get_year_month_day(iso8601_time) return [] if iso8601_time.nil? year = iso8601_time[0..3] month = iso8601_time[5..6] day = iso8601_time[8..9] [year.to_i, month.to_i, day.to_i].reject { |part| part == 0 } end |
#github_as_codemeta_url(url) ⇒ Object
644 645 646 647 648 649 650 651 652 |
# File 'lib/bolognese/utils.rb', line 644 def (url) github_hash = github_from_url(url) if github_hash[:path].to_s.end_with?("codemeta.json") "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}" elsif github_hash[:owner].present? "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/master/codemeta.json" end end |
#github_as_owner_url(url) ⇒ Object
629 630 631 632 |
# File 'lib/bolognese/utils.rb', line 629 def github_as_owner_url(url) github_hash = github_from_url(url) "https://github.com/#{github_hash[:owner]}" if github_hash[:owner].present? end |
#github_as_release_url(url) ⇒ Object
639 640 641 642 |
# File 'lib/bolognese/utils.rb', line 639 def github_as_release_url(url) github_hash = github_from_url(url) "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}/tree/#{github_hash[:release]}" if github_hash[:release].present? end |
#github_as_repo_url(url) ⇒ Object
634 635 636 637 |
# File 'lib/bolognese/utils.rb', line 634 def github_as_repo_url(url) github_hash = github_from_url(url) "https://github.com/#{github_hash[:owner]}/#{github_hash[:repo]}" if github_hash[:repo].present? end |
#github_from_url(url) ⇒ Object
606 607 608 609 610 611 612 613 614 615 |
# File 'lib/bolognese/utils.rb', line 606 def github_from_url(url) return {} unless /\Ahttps:\/\/github\.com\/(.+)(?:\/)?(.+)?(?:\/tree\/)?(.*)\z/.match(url) words = URI.parse(url).path[1..-1].split('/') path = words.length > 3 ? words[4...words.length].join("/") : nil { owner: words[0], repo: words[1], release: words[3], path: path }.compact end |
#github_owner_from_url(url) ⇒ Object
625 626 627 |
# File 'lib/bolognese/utils.rb', line 625 def github_owner_from_url(url) github_from_url(url).fetch(:owner, nil) end |
#github_release_from_url(url) ⇒ Object
621 622 623 |
# File 'lib/bolognese/utils.rb', line 621 def github_release_from_url(url) github_from_url(url).fetch(:release, nil) end |
#github_repo_from_url(url) ⇒ Object
617 618 619 |
# File 'lib/bolognese/utils.rb', line 617 def github_repo_from_url(url) github_from_url(url).fetch(:repo, nil) end |
#jsonlint(json) ⇒ Object
705 706 707 708 709 710 711 712 |
# File 'lib/bolognese/utils.rb', line 705 def jsonlint(json) return ["No JSON provided"] unless json.present? error_array = [] linter = JsonLint::Linter.new linter.send(:check_data, json, error_array) error_array end |
#map_hash_keys(element: nil, mapping: nil) ⇒ Object
541 542 543 544 545 546 547 548 549 550 551 552 553 |
# File 'lib/bolognese/utils.rb', line 541 def map_hash_keys(element: nil, mapping: nil) Array.wrap(element).map do |a| a.map {|k, v| [mapping.fetch(k, k), v] }.reduce({}) do |hsh, (k, v)| if v.is_a?(Hash) hsh[k] = to_schema_org(v) hsh else hsh[k] = v hsh end end end.unwrap end |
#normalize_id(id, options = {}) ⇒ Object
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 |
# File 'lib/bolognese/utils.rb', line 436 def normalize_id(id, ={}) return nil unless id.present? # check for valid DOI doi = normalize_doi(id, ) return doi if doi.present? # check for valid HTTP uri uri = Addressable::URI.parse(id) return nil unless uri && uri.host && %w(http https).include?(uri.scheme) # clean up URL PostRank::URI.clean(id) rescue Addressable::URI::InvalidURIError nil end |
#normalize_ids(ids: nil) ⇒ Object
474 475 476 477 478 479 480 |
# File 'lib/bolognese/utils.rb', line 474 def normalize_ids(ids: nil) Array.wrap(ids).map do |id| { "id" => normalize_id(id["@id"]), "type" => id["@type"] || Metadata::DC_TO_SO_TRANSLATIONS[id["resourceTypeGeneral"]] || "CreativeWork", "title" => id["title"] || id["name"] }.compact end.unwrap end |
#normalize_licenses(licenses) ⇒ Object
find Creative Commons or OSI license in licenses array, normalize url and name
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 |
# File 'lib/bolognese/utils.rb', line 483 def normalize_licenses(licenses) standard_licenses = Array.wrap(licenses).map { |l| URI.parse(l["url"]) }.select { |li| li.host && li.host[/(creativecommons.org|opensource.org)$/] } return licenses unless standard_licenses.present? # use HTTPS uri.scheme = "https" # use host name without subdomain uri.host = Array(/(creativecommons.org|opensource.org)/.match uri.host).last # normalize URLs if uri.host == "creativecommons.org" uri.path = uri.path.split('/')[0..-2].join("/") if uri.path.split('/').last == "legalcode" uri.path << '/' unless uri.path.end_with?('/') else uri.path = uri.path.gsub(/(-license|\.php|\.html)/, '') uri.path = uri.path.sub(/(mit|afl|apl|osl|gpl|ecl)/) { |match| match.upcase } uri.path = uri.path.sub(/(artistic|apache)/) { |match| match.titleize } uri.path = uri.path.sub(/([^0-9\-]+)(-)?([1-9])?(\.)?([0-9])?$/) do m = Regexp.last_match text = m[1] if m[3].present? version = [m[3], m[5].presence || "0"].join(".") [text, version].join("-") else text end end end uri.to_s rescue URI::InvalidURIError nil end |
#normalize_orcid(orcid) ⇒ Object
466 467 468 469 470 471 472 |
# File 'lib/bolognese/utils.rb', line 466 def normalize_orcid(orcid) orcid = validate_orcid(orcid) return nil unless orcid.present? # turn ORCID ID into URL "http://orcid.org/" + Addressable::URI.encode(orcid) end |
#normalize_url(id) ⇒ Object
453 454 455 456 457 458 459 460 461 462 463 464 |
# File 'lib/bolognese/utils.rb', line 453 def normalize_url(id) return nil unless id.present? # check for valid HTTP uri uri = Addressable::URI.parse(id) return nil unless uri && uri.host && %w(http https).include?(uri.scheme) # clean up URL PostRank::URI.clean(id) rescue Addressable::URI::InvalidURIError nil end |
#orcid_as_url(orcid) ⇒ Object
398 399 400 |
# File 'lib/bolognese/utils.rb', line 398 def orcid_as_url(orcid) "https://orcid.org/#{orcid}" if orcid.present? end |
#orcid_from_url(url) ⇒ Object
394 395 396 |
# File 'lib/bolognese/utils.rb', line 394 def orcid_from_url(url) Array(/\A:(http|https):\/\/orcid\.org\/(.+)/.match(url)).last end |
#parse_attributes(element, options = {}) ⇒ Object
421 422 423 424 425 426 427 428 429 430 431 432 433 434 |
# File 'lib/bolognese/utils.rb', line 421 def parse_attributes(element, ={}) content = [:content] || "__content__" if element.is_a?(String) element elsif element.is_a?(Hash) element.fetch(content, nil) elsif element.is_a?(Array) a = element.map { |e| e.is_a?(Hash) ? e.fetch(content, nil) : e }.uniq a = [:first] ? a.first : a.unwrap else nil end end |
#sanitize(text, options = {}) ⇒ Object
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 |
# File 'lib/bolognese/utils.rb', line 589 def sanitize(text, ={}) [:tags] ||= Set.new(%w(strong em b i code pre sub sup br)) content = [:content] || "__content__" custom_scrubber = Bolognese::WhitelistScrubber.new() if text.is_a?(String) Loofah.scrub_fragment(text, custom_scrubber).to_s.gsub(/\u00a0/, ' ').strip elsif text.is_a?(Hash) sanitize(text.fetch(content, nil)) elsif text.is_a?(Array) a = text.map { |e| e.is_a?(Hash) ? sanitize(e.fetch(content, nil)) : sanitize(e) }.uniq a = [:first] ? a.first : a.unwrap else nil end end |
#to_citeproc(element) ⇒ Object
570 571 572 573 574 575 576 577 |
# File 'lib/bolognese/utils.rb', line 570 def to_citeproc(element) Array.wrap(element).map do |a| a["family"] = a["familyName"] a["given"] = a["givenName"] a["literal"] = a["name"] unless a["familyName"].present? a.except("type", "@type", "id", "@id", "name", "familyName", "givenName").compact end.presence end |
#to_ris(element) ⇒ Object
579 580 581 582 583 584 585 586 587 |
# File 'lib/bolognese/utils.rb', line 579 def to_ris(element) Array.wrap(element).map do |a| if a["familyName"].present? [a["familyName"], a["givenName"]].join(", ") else a["name"] end end.unwrap end |
#to_schema_org(element) ⇒ Object
519 520 521 522 523 |
# File 'lib/bolognese/utils.rb', line 519 def to_schema_org(element) mapping = { "type" => "@type", "id" => "@id", "title" => "name" } map_hash_keys(element: element, mapping: mapping) end |
#to_schema_org_container(element, options = {}) ⇒ Object
525 526 527 528 529 530 531 532 533 |
# File 'lib/bolognese/utils.rb', line 525 def to_schema_org_container(element, ={}) mapping = { "type" => "@type", "id" => "@id", "title" => "name" } element ||= {} element["type"] ||= (type == "Dataset") ? "DataCatalog" : "Periodical" element["title"] = [:container_title] if [:container_title].present? map_hash_keys(element: element, mapping: mapping) end |
#validate_orcid(orcid) ⇒ Object
402 403 404 405 |
# File 'lib/bolognese/utils.rb', line 402 def validate_orcid(orcid) orcid = Array(/\A(?:(http|https):\/\/(www\.)?orcid\.org\/)?(\d{4}[[:space:]-]\d{4}[[:space:]-]\d{4}[[:space:]-]\d{3}[0-9X]+)\z/.match(orcid)).last orcid.gsub(/[[:space:]]/, "-") if orcid.present? end |
#validate_orcid_scheme(orcid_scheme) ⇒ Object
407 408 409 |
# File 'lib/bolognese/utils.rb', line 407 def validate_orcid_scheme(orcid_scheme) Array(/\A(http|https):\/\/(www\.)?(orcid\.org)/.match(orcid_scheme)).last end |
#validate_url(str) ⇒ Object
411 412 413 414 415 416 417 418 419 |
# File 'lib/bolognese/utils.rb', line 411 def validate_url(str) if /\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(str) "DOI" elsif /\A(http|https):\/\//.match(str) "URL" elsif /\A(ISSN|eISSN) (\d{4}-\d{3}[0-9X]+)\z/.match(str) "ISSN" end end |