Class: RelatonW3c::DataParser
- Inherits:
-
Object
- Object
- RelatonW3c::DataParser
- Includes:
- RateLimitHandler
- Defined in:
- lib/relaton_w3c/data_parser.rb
Constant Summary collapse
- USED_TYPES =
%w[WD NOTE PER PR REC CR].freeze
- DOCTYPES =
{ "TR" => "technicalReport", "NOTE" => "groupNote", }.freeze
- STAGES =
{ "RET" => "Retired", "SPSD" => "Superseded Recommendation", "OBSL" => "Obsoleted Recommendation", "WD" => "Working Draft", "CRD" => "Candidate Recommendation Draft", "CR" => "Candidate Recommendation", "PR" => "Proposed Recommendation", "PER" => "Proposed Edited Recommendation", "REC" => "Recommendation", }.freeze
Class Method Summary collapse
-
.parse(spec) ⇒ RelatonW3c:W3cBibliographicItem?
Initialize document parser and run it.
-
.parse_identifier(url) ⇒ String
Parse identifier from URL.
Instance Method Summary collapse
- #create_editor(unrealized_editor) ⇒ Object
-
#create_relation(version, type, desc = nil) ⇒ RelatonBib::DocumentRelation
Create relation.
- #doc_uri(spec = @spec) ⇒ Object
-
#identifier(link = doc_uri) ⇒ String
Generate identifier from URL.
-
#initialize(spec) ⇒ DataParser
constructor
Document parser initalization.
-
#parse ⇒ RelatonW3c:W3cBibliographicItem?
Parse document.
-
#parse_contrib ⇒ Array<RelatonBib::ContributionInfo>
Parse contributor.
-
#parse_date ⇒ Array<RelatonBib::BibliographicDate>
Parse date.
-
#parse_docid ⇒ Arra<RelatonBib::DocumentIdentifier>
Parse docidentifier.
-
#parse_docstatus ⇒ RelatonBib::DocumentStatus?
Extract documetn status.
-
#parse_doctype ⇒ String?
Parse doctype.
-
#parse_editorialgroup ⇒ RelatonBib::EditorialGroup
Parse editorialgroup.
-
#parse_formattedref ⇒ RelatonBib::FormattedRef
Parse formattedref.
-
#parse_link ⇒ Array<RelatonBib::TypedUri>
Parse link.
-
#parse_relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
-
#parse_series ⇒ Array<RelatonBib::Series>
Parse series.
-
#parse_title(spec = @spec) ⇒ RelatonBib::TypedTitleStringCollection
Parse title.
-
#pub_id(url) ⇒ String
Generate PubID.
-
#relations ⇒ Array<RelatonBib::DocumentRelation>
Create relations.
-
#type ⇒ String
Extract type.
-
#type_from_link ⇒ String?
Fetch type from link.
Methods included from RateLimitHandler
Constructor Details
#initialize(spec) ⇒ DataParser
Document parser initalization
30 31 32 |
# File 'lib/relaton_w3c/data_parser.rb', line 30 def initialize(spec) @spec = spec end |
Class Method Details
.parse(spec) ⇒ RelatonW3c:W3cBibliographicItem?
Initialize document parser and run it
41 42 43 |
# File 'lib/relaton_w3c/data_parser.rb', line 41 def self.parse(spec) new(spec).parse end |
.parse_identifier(url) ⇒ String
Parse identifier from URL
144 145 146 147 148 149 |
# File 'lib/relaton_w3c/data_parser.rb', line 144 def self.parse_identifier(url) if /.+\/(\w+(?:[-+][\w.]+)+(?:\/\w+)?)/ =~ url.to_s $1.to_s else url.to_s.split("/").last end end |
Instance Method Details
#create_editor(unrealized_editor) ⇒ Object
297 298 299 300 301 302 303 304 305 306 |
# File 'lib/relaton_w3c/data_parser.rb', line 297 def create_editor(unrealized_editor) editor = realize unrealized_editor return unless editor surname = RelatonBib::LocalizedString.new(editor.family, "en", "Latn") forename = RelatonBib::Forename.new(content: editor.given, language: "en", script: "Latn") name = RelatonBib::FullName.new surname: surname, forename: [forename] person = RelatonBib::Person.new name: name RelatonBib::ContributionInfo.new(entity: person, role: [type: "editor"]) end |
#create_relation(version, type, desc = nil) ⇒ RelatonBib::DocumentRelation
Create relation
251 252 253 254 255 256 257 258 259 260 261 262 |
# File 'lib/relaton_w3c/data_parser.rb', line 251 def create_relation(version, type, desc = nil) version_spec = realize version url = doc_uri(version_spec) id = pub_id(url) # fref = RelatonBib::FormattedRef.new content: id title = parse_title(version_spec) docid = RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true) link = [RelatonBib::TypedUri.new(type: "src", content: url)] bib = W3cBibliographicItem.new title: title, docid: [docid], link: link dsc = RelatonBib::FormattedString.new content: desc if desc RelatonBib::DocumentRelation.new(type: type, bibitem: bib, description: dsc) end |
#doc_uri(spec = @spec) ⇒ Object
94 95 96 |
# File 'lib/relaton_w3c/data_parser.rb', line 94 def doc_uri(spec = @spec) spec.respond_to?(:uri) ? spec.uri : spec.shortlink end |
#identifier(link = doc_uri) ⇒ String
Generate identifier from URL
133 134 135 |
# File 'lib/relaton_w3c/data_parser.rb', line 133 def identifier(link = doc_uri) self.class.parse_identifier(link) end |
#parse ⇒ RelatonW3c:W3cBibliographicItem?
Parse document
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/relaton_w3c/data_parser.rb', line 50 def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize # return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) } RelatonW3c::W3cBibliographicItem.new( type: "standard", doctype: parse_doctype, language: ["en"], script: ["Latn"], docstatus: parse_docstatus, title: parse_title, link: parse_link, docid: parse_docid, formattedref: parse_formattedref, docnumber: identifier, series: parse_series, date: parse_date, relation: parse_relation, contributor: parse_contrib, editorialgroup: parse_editorialgroup, ) end |
#parse_contrib ⇒ Array<RelatonBib::ContributionInfo>
Parse contributor
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 |
# File 'lib/relaton_w3c/data_parser.rb', line 280 def parse_contrib # rubocop:disable Metrics/MethodLength publisher = RelatonBib::Organization.new( name: "World Wide Web Consortium", abbreviation: "W3C", url: "https://www.w3.org/" ) contribs = [RelatonBib::ContributionInfo.new(entity: publisher, role: [type: "publisher"])] if @spec.links.respond_to?(:editors) editors = realize @spec.links.editors editors.links.editors&.each do |ed| editor = create_editor(ed) contribs << editor if editor end end contribs end |
#parse_date ⇒ Array<RelatonBib::BibliographicDate>
Parse date
199 200 201 202 203 |
# File 'lib/relaton_w3c/data_parser.rb', line 199 def parse_date return [] unless @spec.respond_to?(:date) [RelatonBib::BibliographicDate.new(type: "published", on: @spec.date.to_date.to_s)] end |
#parse_docid ⇒ Arra<RelatonBib::DocumentIdentifier>
Parse docidentifier
112 113 114 115 |
# File 'lib/relaton_w3c/data_parser.rb', line 112 def parse_docid id = pub_id(doc_uri) [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)] end |
#parse_docstatus ⇒ RelatonBib::DocumentStatus?
Extract documetn status
77 78 79 80 81 82 |
# File 'lib/relaton_w3c/data_parser.rb', line 77 def parse_docstatus # stage = types_stages&.detect { |st| STAGES.include?(st) } return unless @spec.respond_to?(:status) && @spec.status RelatonBib::DocumentStatus.new stage: @spec.status end |
#parse_doctype ⇒ String?
Parse doctype
179 180 181 182 |
# File 'lib/relaton_w3c/data_parser.rb', line 179 def parse_doctype t = DOCTYPES[type] || DOCTYPES[type_from_link] DocumentType.new(type: t) if t end |
#parse_editorialgroup ⇒ RelatonBib::EditorialGroup
Parse editorialgroup
313 314 315 316 317 318 319 320 321 322 323 324 325 |
# File 'lib/relaton_w3c/data_parser.rb', line 313 def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize return unless @spec.links.respond_to?(:deliverers) deliverers = realize @spec.links.deliverers return unless deliverers.links.deliverers tc = deliverers.links.deliverers.map do |edg| wg = RelatonBib::WorkGroup.new(name: edg.title) RelatonBib::TechnicalCommittee.new(wg) end RelatonBib::EditorialGroup.new tc end |
#parse_formattedref ⇒ RelatonBib::FormattedRef
Parse formattedref
269 270 271 272 273 |
# File 'lib/relaton_w3c/data_parser.rb', line 269 def parse_formattedref return unless @spec.respond_to?(:uri) RelatonBib::FormattedRef.new(content: pub_id(@spec.uri)) end |
#parse_link ⇒ Array<RelatonBib::TypedUri>
Parse link
103 104 105 |
# File 'lib/relaton_w3c/data_parser.rb', line 103 def parse_link [RelatonBib::TypedUri.new(type: "src", content: doc_uri)] # + editor_drafts end |
#parse_relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation
210 211 212 213 214 215 216 217 |
# File 'lib/relaton_w3c/data_parser.rb', line 210 def parse_relation if @spec.links.respond_to?(:version_history) version_history = realize @spec.links.version_history version_history.links.spec_versions.map { |version| create_relation(version, "hasEdition") } else relations end end |
#parse_series ⇒ Array<RelatonBib::Series>
Parse series
156 157 158 159 160 161 |
# File 'lib/relaton_w3c/data_parser.rb', line 156 def parse_series return [] unless type title = RelatonBib::TypedTitleString.new content: "W3C #{type}" [RelatonBib::Series.new(title: title, number: identifier)] end |
#parse_title(spec = @spec) ⇒ RelatonBib::TypedTitleStringCollection
Parse title
89 90 91 92 |
# File 'lib/relaton_w3c/data_parser.rb', line 89 def parse_title(spec = @spec) t = RelatonBib::TypedTitleString.new content: spec.title RelatonBib::TypedTitleStringCollection.new [t] end |
#pub_id(url) ⇒ String
Generate PubID
122 123 124 |
# File 'lib/relaton_w3c/data_parser.rb', line 122 def pub_id(url) "W3C #{identifier(url)}" end |
#relations ⇒ Array<RelatonBib::DocumentRelation>
Create relations
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/relaton_w3c/data_parser.rb', line 224 def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize rels = [] rels << create_relation(@spec.links.specification, "editionOf") if @spec.links.respond_to?(:specification) if @spec.links.respond_to?(:predecessor_versions) && @spec.links.predecessor_versions predecessor_versions = realize @spec.links.predecessor_versions predecessor_versions.links.predecessor_versions.each do |version| rels << create_relation(version, "obsoletes") end end if @spec.links.respond_to?(:successor_versions) && @spec.links.successor_versions successor_versions = realize @spec.links.successor_versions successor_versions.links.successor_versions.each do |version| rels << create_relation(version, "updatedBy", "errata") end end rels end |
#type ⇒ String
Extract type
168 169 170 171 172 |
# File 'lib/relaton_w3c/data_parser.rb', line 168 def type # there are many types, we need to find the right one # @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) } || "technicalReport" @type ||= @spec.respond_to?(:status) ? @spec.status : "technicalReport" end |
#type_from_link ⇒ String?
Fetch type from link
189 190 191 192 |
# File 'lib/relaton_w3c/data_parser.rb', line 189 def type_from_link # link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of @spec.shortlink.strip.match(/www\.w3\.org\/(TR)/)&.to_a&.fetch 1 end |