Class: RelatonW3c::DataParser

Inherits:
Object
  • Object
show all
Includes:
RateLimitHandler
Defined in:
lib/relaton_w3c/data_parser.rb

Constant Summary collapse

USED_TYPES =
%w[WD NOTE PER PR REC CR].freeze
DOCTYPES =
{
  "TR" => "technicalReport",
  "NOTE" => "groupNote",
}.freeze
STAGES =
{
  "RET" => "Retired",
  "SPSD" => "Superseded Recommendation",
  "OBSL" => "Obsoleted Recommendation",
  "WD" => "Working Draft",
  "CRD" => "Candidate Recommendation Draft",
  "CR" => "Candidate Recommendation",
  "PR" => "Proposed Recommendation",
  "PER" => "Proposed Edited Recommendation",
  "REC" => "Recommendation",
}.freeze

Class Method Summary collapse

Instance Method Summary collapse

Methods included from RateLimitHandler

fetched_objects, #realize

Constructor Details

#initialize(spec) ⇒ DataParser

Document parser initalization

Parameters:

  • sol (W3cApi::Models::SpecVersion)

    entry from the SPARQL query

  • fetcher (RelatonW3c::DataFetcher)

    data fetcher



30
31
32
# File 'lib/relaton_w3c/data_parser.rb', line 30

def initialize(spec)
  @spec = spec
end

Class Method Details

.parse(spec) ⇒ RelatonW3c:W3cBibliographicItem?

Initialize document parser and run it

Parameters:

  • sol (W3cApi::Models::SpecVersion)

    entry from the SPARQL query

Returns:

  • (RelatonW3c:W3cBibliographicItem, nil)

    bibliographic item



41
42
43
# File 'lib/relaton_w3c/data_parser.rb', line 41

def self.parse(spec)
  new(spec).parse
end

.parse_identifier(url) ⇒ String

Parse identifier from URL

Parameters:

  • url (String)

    URL

Returns:

  • (String)

    identifier



144
145
146
147
148
149
# File 'lib/relaton_w3c/data_parser.rb', line 144

def self.parse_identifier(url)
  if /.+\/(\w+(?:[-+][\w.]+)+(?:\/\w+)?)/ =~ url.to_s
    $1.to_s
  else url.to_s.split("/").last
  end
end

Instance Method Details

#create_editor(unrealized_editor) ⇒ Object



297
298
299
300
301
302
303
304
305
306
# File 'lib/relaton_w3c/data_parser.rb', line 297

def create_editor(unrealized_editor)
  editor = realize unrealized_editor
  return unless editor

  surname = RelatonBib::LocalizedString.new(editor.family, "en", "Latn")
  forename = RelatonBib::Forename.new(content: editor.given, language: "en", script: "Latn")
  name = RelatonBib::FullName.new surname: surname, forename: [forename]
  person = RelatonBib::Person.new name: name
  RelatonBib::ContributionInfo.new(entity: person, role: [type: "editor"])
end

#create_relation(version, type, desc = nil) ⇒ RelatonBib::DocumentRelation

Create relation

Parameters:

  • url (String)

    relation URL

  • type (String)

    relation type

  • desc (String, nil) (defaults to: nil)

    relation description

Returns:

  • (RelatonBib::DocumentRelation)

    <description>



251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/relaton_w3c/data_parser.rb', line 251

def create_relation(version, type, desc = nil)
  version_spec = realize version
  url = doc_uri(version_spec)
  id = pub_id(url)
  # fref = RelatonBib::FormattedRef.new content: id
  title = parse_title(version_spec)
  docid = RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)
  link = [RelatonBib::TypedUri.new(type: "src", content: url)]
  bib = W3cBibliographicItem.new title: title, docid: [docid], link: link
  dsc = RelatonBib::FormattedString.new content: desc if desc
  RelatonBib::DocumentRelation.new(type: type, bibitem: bib, description: dsc)
end

#doc_uri(spec = @spec) ⇒ Object



94
95
96
# File 'lib/relaton_w3c/data_parser.rb', line 94

def doc_uri(spec = @spec)
  spec.respond_to?(:uri) ? spec.uri : spec.shortlink
end

#identifier(link = doc_uri) ⇒ String

Generate identifier from URL

Parameters:

  • link (String) (defaults to: doc_uri)

Returns:

  • (String)

    identifier



133
134
135
# File 'lib/relaton_w3c/data_parser.rb', line 133

def identifier(link = doc_uri)
  self.class.parse_identifier(link)
end

#parseRelatonW3c:W3cBibliographicItem?

Parse document

Returns:

  • (RelatonW3c:W3cBibliographicItem, nil)

    bibliographic item



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/relaton_w3c/data_parser.rb', line 50

def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  # return if @sol.respond_to?(:link) && !types_stages.detect { |ts| USED_TYPES.include?(ts) }

  RelatonW3c::W3cBibliographicItem.new(
    type: "standard",
    doctype: parse_doctype,
    language: ["en"],
    script: ["Latn"],
    docstatus: parse_docstatus,
    title: parse_title,
    link: parse_link,
    docid: parse_docid,
    formattedref: parse_formattedref,
    docnumber: identifier,
    series: parse_series,
    date: parse_date,
    relation: parse_relation,
    contributor: parse_contrib,
    editorialgroup: parse_editorialgroup,
  )
end

#parse_contribArray<RelatonBib::ContributionInfo>

Parse contributor

Returns:

  • (Array<RelatonBib::ContributionInfo>)

    contributor



280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'lib/relaton_w3c/data_parser.rb', line 280

def parse_contrib # rubocop:disable Metrics/MethodLength
  publisher = RelatonBib::Organization.new(
    name: "World Wide Web Consortium", abbreviation: "W3C", url: "https://www.w3.org/"
  )
  contribs = [RelatonBib::ContributionInfo.new(entity: publisher, role: [type: "publisher"])]

  if @spec.links.respond_to?(:editors)
    editors = realize @spec.links.editors
    editors.links.editors&.each do |ed|
      editor = create_editor(ed)
      contribs << editor if editor
    end
  end

  contribs
end

#parse_dateArray<RelatonBib::BibliographicDate>

Parse date

Returns:

  • (Array<RelatonBib::BibliographicDate>)

    date



199
200
201
202
203
# File 'lib/relaton_w3c/data_parser.rb', line 199

def parse_date
  return [] unless @spec.respond_to?(:date)

  [RelatonBib::BibliographicDate.new(type: "published", on: @spec.date.to_date.to_s)]
end

#parse_docidArra<RelatonBib::DocumentIdentifier>

Parse docidentifier

Returns:

  • (Arra<RelatonBib::DocumentIdentifier>)

    docidentifier



112
113
114
115
# File 'lib/relaton_w3c/data_parser.rb', line 112

def parse_docid
  id = pub_id(doc_uri)
  [RelatonBib::DocumentIdentifier.new(type: "W3C", id: id, primary: true)]
end

#parse_docstatusRelatonBib::DocumentStatus?

Extract documetn status

Returns:

  • (RelatonBib::DocumentStatus, nil)

    dcoument status



77
78
79
80
81
82
# File 'lib/relaton_w3c/data_parser.rb', line 77

def parse_docstatus
  # stage = types_stages&.detect { |st| STAGES.include?(st) }
  return unless @spec.respond_to?(:status) && @spec.status

  RelatonBib::DocumentStatus.new stage: @spec.status
end

#parse_doctypeString?

Parse doctype

Returns:

  • (String, nil)

    doctype



179
180
181
182
# File 'lib/relaton_w3c/data_parser.rb', line 179

def parse_doctype
  t = DOCTYPES[type] || DOCTYPES[type_from_link]
  DocumentType.new(type: t) if t
end

#parse_editorialgroupRelatonBib::EditorialGroup

Parse editorialgroup

Returns:

  • (RelatonBib::EditorialGroup)

    editorialgroup



313
314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/relaton_w3c/data_parser.rb', line 313

def parse_editorialgroup # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  return unless @spec.links.respond_to?(:deliverers)

  deliverers = realize @spec.links.deliverers
  return unless deliverers.links.deliverers

  tc = deliverers.links.deliverers.map do |edg|
    wg = RelatonBib::WorkGroup.new(name: edg.title)
    RelatonBib::TechnicalCommittee.new(wg)
  end

  RelatonBib::EditorialGroup.new tc
end

#parse_formattedrefRelatonBib::FormattedRef

Parse formattedref

Returns:

  • (RelatonBib::FormattedRef)

    formattedref



269
270
271
272
273
# File 'lib/relaton_w3c/data_parser.rb', line 269

def parse_formattedref
  return unless @spec.respond_to?(:uri)

  RelatonBib::FormattedRef.new(content: pub_id(@spec.uri))
end

Parse link

Returns:

  • (Array<RelatonBib::TypedUri>)

    link



103
104
105
# File 'lib/relaton_w3c/data_parser.rb', line 103

def parse_link
  [RelatonBib::TypedUri.new(type: "src", content: doc_uri)] # + editor_drafts
end

#parse_relationArray<RelatonBib::DocumentRelation>

Parse relation

Returns:

  • (Array<RelatonBib::DocumentRelation>)

    relation



210
211
212
213
214
215
216
217
# File 'lib/relaton_w3c/data_parser.rb', line 210

def parse_relation
  if @spec.links.respond_to?(:version_history)
    version_history = realize @spec.links.version_history
    version_history.links.spec_versions.map { |version| create_relation(version, "hasEdition") }
  else
    relations
  end
end

#parse_seriesArray<RelatonBib::Series>

Parse series

Returns:

  • (Array<RelatonBib::Series>)

    series



156
157
158
159
160
161
# File 'lib/relaton_w3c/data_parser.rb', line 156

def parse_series
  return [] unless type

  title = RelatonBib::TypedTitleString.new content: "W3C #{type}"
  [RelatonBib::Series.new(title: title, number: identifier)]
end

#parse_title(spec = @spec) ⇒ RelatonBib::TypedTitleStringCollection

Parse title

Returns:

  • (RelatonBib::TypedTitleStringCollection)

    title



89
90
91
92
# File 'lib/relaton_w3c/data_parser.rb', line 89

def parse_title(spec = @spec)
  t = RelatonBib::TypedTitleString.new content: spec.title
  RelatonBib::TypedTitleStringCollection.new [t]
end

#pub_id(url) ⇒ String

Generate PubID

Returns:

  • (String)

    PubID



122
123
124
# File 'lib/relaton_w3c/data_parser.rb', line 122

def pub_id(url)
  "W3C #{identifier(url)}"
end

#relationsArray<RelatonBib::DocumentRelation>

Create relations

Returns:

  • (Array<RelatonBib::DocumentRelation>)

    relations



224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/relaton_w3c/data_parser.rb', line 224

def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  rels = []
  rels << create_relation(@spec.links.specification, "editionOf") if @spec.links.respond_to?(:specification)
  if @spec.links.respond_to?(:predecessor_versions) && @spec.links.predecessor_versions
    predecessor_versions = realize @spec.links.predecessor_versions
    predecessor_versions.links.predecessor_versions.each do |version|
      rels << create_relation(version, "obsoletes")
    end
  end
  if @spec.links.respond_to?(:successor_versions) && @spec.links.successor_versions
    successor_versions = realize @spec.links.successor_versions
    successor_versions.links.successor_versions.each do |version|
      rels << create_relation(version, "updatedBy", "errata")
    end
  end
  rels
end

#typeString

Extract type

Returns:

  • (String)

    type



168
169
170
171
172
# File 'lib/relaton_w3c/data_parser.rb', line 168

def type
  # there are many types, we need to find the right one
  # @type ||= types_stages&.detect { |t| USED_TYPES.include?(t) } || "technicalReport"
  @type ||= @spec.respond_to?(:status) ? @spec.status : "technicalReport"
end

Fetch type from link

Returns:

  • (String, nil)

    type



189
190
191
192
# File 'lib/relaton_w3c/data_parser.rb', line 189

def type_from_link
  # link = @sol.respond_to?(:link) ? @sol.link : @sol.version_of
  @spec.shortlink.strip.match(/www\.w3\.org\/(TR)/)&.to_a&.fetch 1
end