30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
# File 'lib/bolognese/readers/schema_org_reader.rb', line 30
def read_schema_org(string: nil, **options)
if string.present?
errors = jsonlint(string)
return { "errors" => errors } if errors.present?
end
meta = string.present? ? Maremma.from_json(string) : {}
identifier = Array.wrap(meta.fetch("identifier", nil))
if identifier.length > 1
alternate_identifier = identifier[1..-1].map do |r|
if r.is_a?(String)
{ "type" => "URL", "name" => r }
elsif r.is_a?(Hash)
{ "type" => r["propertyID"], "name" => r["value"] }
end
end.unwrap
else
alternate_identifier = nil
end
identifier = identifier.first
id = normalize_id(meta.fetch("@id", nil) || meta.fetch("identifier", nil))
type = meta.fetch("@type", nil) && meta.fetch("@type").camelcase
resource_type_general = Bolognese::Utils::SO_TO_DC_TRANSLATIONS[type]
authors = meta.fetch("author", nil) || meta.fetch("creator", nil)
author = get_authors(from_schema_org(Array.wrap(authors)))
editor = get_authors(from_schema_org(Array.wrap(meta.fetch("editor", nil))))
publisher = parse_attributes(meta.fetch("publisher", nil), content: "name", first: true)
included_in_data_catalog = from_schema_org(Array.wrap(meta.fetch("includedInDataCatalog", nil)))
included_in_data_catalog = Array.wrap(included_in_data_catalog).reduce([]) do |sum, dc|
sum << { "title" => dc["name"], "url" => dc["url"] } if dc["url"].present?
sum
end.unwrap
is_part_of = schema_org_is_part_of(meta) || included_in_data_catalog
license = {
"id" => parse_attributes(meta.fetch("license", nil), content: "id", first: true),
"name" => parse_attributes(meta.fetch("license", nil), content: "name", first: true)
}
funding = from_schema_org(Array.wrap(meta.fetch("funding", nil)))
date_published = meta.fetch("datePublished", nil)
state = meta.present? ? "findable" : "not_found"
ct = (type == "Dataset") ? "includedInDataCatalog" : "Periodical"
container_title = parse_attributes(from_schema_org(meta.fetch(ct, nil)), content: "name", first: true)
{ "id" => id,
"type" => type,
"additional_type" => meta.fetch("additionalType", nil),
"citeproc_type" => Bolognese::Utils::SO_TO_CP_TRANSLATIONS[type] || "article-journal",
"bibtex_type" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[type] || "misc",
"ris_type" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[resource_type_general.to_s.dasherize] || "GEN",
"resource_type_general" => resource_type_general,
"doi" => validate_doi(id),
"identifier" => identifier,
"alternate_identifier" => alternate_identifier,
"b_url" => normalize_id(meta.fetch("url", nil)),
"content_url" => Array.wrap(meta.fetch("contentUrl", nil)).unwrap,
"content_size" => meta.fetch("contenSize", nil),
"content_format" => Array.wrap(meta.fetch("encodingFormat", nil) || meta.fetch("fileFormat", nil)).unwrap,
"title" => meta.fetch("name", nil),
"author" => author,
"editor" => editor,
"publisher" => publisher,
"service_provider" => parse_attributes(meta.fetch("provider", nil), content: "name", first: true),
"container_title" => container_title,
"is_identical_to" => schema_org_is_identical_to(meta),
"is_part_of" => is_part_of,
"has_part" => schema_org_has_part(meta),
"references" => schema_org_references(meta),
"is_referenced_by" => schema_org_is_referenced_by(meta),
"is_supplement_to" => schema_org_is_supplement_to(meta),
"is_supplemented_by" => schema_org_is_supplemented_by(meta),
"date_created" => meta.fetch("dateCreated", nil),
"date_published" => date_published,
"date_modified" => meta.fetch("dateModified", nil),
"description" => meta.fetch("description", nil).present? ? { "text" => sanitize(meta.fetch("description")) } : nil,
"license" => license,
"b_version" => meta.fetch("version", nil),
"keywords" => meta.fetch("keywords", nil).to_s.split(", "),
"state" => state,
"schema_version" => meta.fetch("schemaVersion", nil),
"funding" => funding
}
end
|