30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
# File 'lib/bolognese/readers/schema_org_reader.rb', line 30
def read_schema_org(string: nil, **options)
if string.present?
errors = jsonlint(string)
return { "errors" => errors } if errors.present?
end
meta = string.present? ? Maremma.from_json(string) : {}
identifier = Array.wrap(meta.fetch("identifier", nil))
if identifier.length > 1
alternate_identifiers = identifier[1..-1].map do |r|
if r.is_a?(String)
{ "type" => "URL", "name" => r }
elsif r.is_a?(Hash)
{ "type" => r["propertyID"], "name" => r["value"] }
end
end.unwrap
else
alternate_identifiers = nil
end
identifier = identifier.first
id = normalize_id(meta.fetch("@id", nil) || meta.fetch("identifier", nil))
type = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[type]
authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
author = get_authors(from_schema_org(Array.wrap(authors)))
editor = get_authors(from_schema_org(Array.wrap(meta.fetch("editor", nil))))
publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
ct = (type == "Dataset") ? "includedInDataCatalog" : "Periodical"
periodical = if meta.fetch(ct, nil).present?
{
"type" => (type == "Dataset") ? "DataCatalog" : "Periodical",
"title" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true),
"url" => parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "url", first: true)
}.compact
else
nil
end
related_identifiers = Array.wrap(schema_org_is_identical_to(meta)) +
Array.wrap(schema_org_is_part_of(meta)) +
Array.wrap(schema_org_has_part(meta)) +
Array.wrap(schema_org_is_previous_version_of(meta)) +
Array.wrap(schema_org_is_new_version_of(meta)) +
Array.wrap(schema_org_references(meta)) +
Array.wrap(schema_org_is_referenced_by(meta)) +
Array.wrap(schema_org_is_supplement_to(meta)) +
Array.wrap(schema_org_is_supplemented_by(meta))
rights = {
"id" => parse_attributes(meta.fetch("license", nil), content: "id", first: true),
"name" => parse_attributes(meta.fetch("license", nil), content: "name", first: true)
}
funding_references = from_schema_org(Array.wrap(meta.fetch("funder", nil)))
funding_references = Array.wrap(meta.fetch("funder", nil)).compact.map do |fr|
{
"funder_name" => fr["name"],
"funder_identifier" => fr["@id"],
"funder_identifier_type" => fr["@id"].to_s.start_with?("https://doi.org/10.13039") ? "Crossref Funder ID" : nil }.compact
end
date_published = meta.fetch("datePublished", nil)
state = meta.present? ? "findable" : "not_found"
geo_location = Array.wrap(meta.fetch("spatialCoverage", nil)).map do |gl|
if gl.dig("geo", "box")
s, w, n, e = gl.dig("geo", "box").split(" ", 4)
geo_location_box = {
"west_bound_longitude" => w,
"east_bound_longitude" => e,
"south_bound_latitude" => s,
"north_bound_latitude" => n
}.compact.presence
else
geo_location_box = nil
end
geo_location_point = { "point_longitude" => gl.dig("geo", "longitude"), "point_latitude" => gl.dig("geo", "latitude") }.compact.presence
{
"geo_location_place" => gl.dig("geo", "address"),
"geo_location_point" => geo_location_point,
"geo_location_box" => geo_location_box
}.compact
end
{ "id" => id,
"type" => type,
"additional_type" => meta.fetch("additionalType", nil),
"citeproc_type" => Bolognese::Utils::SO_TO_CP_TRANSLATIONS[type] || "article-journal",
"bibtex_type" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[type] || "misc",
"ris_type" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN",
"resource_type_general" => resource_type_general,
"doi" => validate_doi(id),
"identifier" => identifier,
"alternate_identifiers" => alternate_identifiers,
"b_url" => normalize_id(meta.fetch("url", nil)),
"content_url" => Array.wrap(meta.fetch("contentUrl", nil)).unwrap,
"size" => meta.fetch("contenSize", nil),
"format" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)).unwrap,
"title" => meta.fetch("name", nil),
"creator" => author,
"editor" => editor,
"publisher" => publisher,
"service_provider" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
"periodical" => periodical,
"related_identifiers" => related_identifiers,
"date_created" => meta.fetch("dateCreated", nil),
"date_published" => date_published,
"date_modified" => meta.fetch("dateModified", nil),
"description" => meta.fetch("description", nil).present? ? { "text" => sanitize(meta.fetch("description")) } : nil,
"rights" => rights,
"b_version" => meta.fetch("version", nil),
"keywords" => meta.fetch("keywords", nil).to_s.split(", "),
"state" => state,
"schema_version" => meta.fetch("schemaVersion", nil),
"funding_references" => funding_references,
"geo_location" => geo_location
}
end
|