Class: DJNML

Inherits:
Object
  • Object
show all
Defined in:
lib/djnml.rb,
lib/djnml/codes.rb,
lib/djnml/delete.rb,
lib/djnml/modification.rb

Defined Under Namespace

Classes: Codes, Delete, FileError, Modification

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data = {}) ⇒ DJNML

Returns a new instance of DJNML.



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/djnml.rb', line 54

def initialize(data = {})
  @msize = data['msize'].to_i
  @md5   = data['md5']
  @sys_id = data['sys_id']
  @destination = data['destination']
  @dist_id = data['dist_id']
  @transmission_date = Time.parse(data['transmission_date'])
  @publisher = data['publisher']
  @doc_date  = Time.parse(data['doc_date'])
  @product   = data['product']
  @seq       = data['seq'].to_i
  @lang      = data['lang']
  @news_source = data['news_source']
  @origin      = data['origin']
  @service_id  = data['service_id']
  @urgency     = data['urgency']
  @brand            = data['brand']
  @temp_perm        = data['temp_perm']
  @retention        = data['retention']
  @hot              = data['hot']
  @original_source  = data['original_source']
  @accession_number = data['accession_number']
  @page_citation    = data['page_citation']
  @display_date     = Time.parse(data['display_date'])
  @company_code     = data['company_code']
  @isin_code        = data['isin_code']
  @page_code        = data['page_code']
  @industry_code   = data['industry_code'].to_a.map { |c| Codes.new(c) }
  @government_code = data['government_code'].to_a.map { |c| Codes.new(c) }
  @subject_code    = data['subject_code'].to_a.map { |c| Codes.new(c) }
  @market_code     = data['market_code'].to_a.map { |c| Codes.new(c) }
  @product_code    = data['product_code'].to_a.map { |c| Codes.new(c) }
  @geo_code        = data['geo_code'].to_a.map { |c| Codes.new(c) }
  @stat_code       = data['stat_code'].to_a.map { |c| Codes.new(c) }
  @journal_code    = data['stat_code'].to_a.map { |c| Codes.new(c) }
  @routing_code    = data['routing_code'].to_a.map { |c| Codes.new(c) }
  @content_code    = data['content_code'].to_a.map { |c| Codes.new(c) }
  @function_code   = data['function_code'].to_a.map { |c| Codes.new(c) }
  @headline        = data['headline']
  @headline_brand  = data['headline_brand']
  @html            = data['html']
  @text            = data['text']
  @copyright_year  = data['copyright_year']
  @copyright_holder = data['copyright_holder']
  @website = data['website']
  @company_name = data['company_name']
  @company_address=  data['company_address']
  @company_zip = data['company_zip']
  @company_city = data['company_city']
  @language = data['language']
end

Instance Attribute Details

#accession_numberObject (readonly)

Returns the value of attribute accession_number.



38
39
40
# File 'lib/djnml.rb', line 38

def accession_number
  @accession_number
end

#brandObject (readonly)

Returns the value of attribute brand.



38
39
40
# File 'lib/djnml.rb', line 38

def brand
  @brand
end

#company_addressObject (readonly)

Returns the value of attribute company_address.



38
39
40
# File 'lib/djnml.rb', line 38

def company_address
  @company_address
end

#company_cityObject (readonly)

Returns the value of attribute company_city.



38
39
40
# File 'lib/djnml.rb', line 38

def company_city
  @company_city
end

#company_codeObject (readonly)

Returns the value of attribute company_code.



38
39
40
# File 'lib/djnml.rb', line 38

def company_code
  @company_code
end

#company_nameObject (readonly)

Returns the value of attribute company_name.



38
39
40
# File 'lib/djnml.rb', line 38

def company_name
  @company_name
end

#company_zipObject (readonly)

Returns the value of attribute company_zip.



38
39
40
# File 'lib/djnml.rb', line 38

def company_zip
  @company_zip
end

#content_codeObject (readonly)

Returns the value of attribute content_code.



38
39
40
# File 'lib/djnml.rb', line 38

def content_code
  @content_code
end

Returns the value of attribute copyright_holder.



38
39
40
# File 'lib/djnml.rb', line 38

def copyright_holder
  @copyright_holder
end

Returns the value of attribute copyright_year.



38
39
40
# File 'lib/djnml.rb', line 38

def copyright_year
  @copyright_year
end

#deleteObject (readonly)

Returns the value of attribute delete.



38
39
40
# File 'lib/djnml.rb', line 38

def delete
  @delete
end

#destinationObject (readonly)

Returns the value of attribute destination.



38
39
40
# File 'lib/djnml.rb', line 38

def destination
  @destination
end

#display_dateObject (readonly)

Returns the value of attribute display_date.



38
39
40
# File 'lib/djnml.rb', line 38

def display_date
  @display_date
end

#dist_idObject (readonly)

Returns the value of attribute dist_id.



38
39
40
# File 'lib/djnml.rb', line 38

def dist_id
  @dist_id
end

#doc_dateObject (readonly)

Returns the value of attribute doc_date.



38
39
40
# File 'lib/djnml.rb', line 38

def doc_date
  @doc_date
end

#function_codeObject (readonly)

Returns the value of attribute function_code.



38
39
40
# File 'lib/djnml.rb', line 38

def function_code
  @function_code
end

#geo_codeObject (readonly)

Returns the value of attribute geo_code.



38
39
40
# File 'lib/djnml.rb', line 38

def geo_code
  @geo_code
end

#government_codeObject (readonly)

Returns the value of attribute government_code.



38
39
40
# File 'lib/djnml.rb', line 38

def government_code
  @government_code
end

#headlineObject (readonly)

Returns the value of attribute headline.



38
39
40
# File 'lib/djnml.rb', line 38

def headline
  @headline
end

#headline_brandObject (readonly)

Returns the value of attribute headline_brand.



38
39
40
# File 'lib/djnml.rb', line 38

def headline_brand
  @headline_brand
end

#hotObject (readonly)

Returns the value of attribute hot.



38
39
40
# File 'lib/djnml.rb', line 38

def hot
  @hot
end

#htmlObject (readonly)

Returns the value of attribute html.



38
39
40
# File 'lib/djnml.rb', line 38

def html
  @html
end

#industry_codeObject (readonly)

Returns the value of attribute industry_code.



38
39
40
# File 'lib/djnml.rb', line 38

def industry_code
  @industry_code
end

#isin_codeObject (readonly)

Returns the value of attribute isin_code.



38
39
40
# File 'lib/djnml.rb', line 38

def isin_code
  @isin_code
end

#journal_codeObject (readonly)

Returns the value of attribute journal_code.



38
39
40
# File 'lib/djnml.rb', line 38

def journal_code
  @journal_code
end

#langObject (readonly)

Returns the value of attribute lang.



38
39
40
# File 'lib/djnml.rb', line 38

def lang
  @lang
end

#languageObject (readonly)

Returns the value of attribute language.



38
39
40
# File 'lib/djnml.rb', line 38

def language
  @language
end

#market_codeObject (readonly)

Returns the value of attribute market_code.



38
39
40
# File 'lib/djnml.rb', line 38

def market_code
  @market_code
end

#md5Object (readonly)

Returns the value of attribute md5.



38
39
40
# File 'lib/djnml.rb', line 38

def md5
  @md5
end

#modificationsObject (readonly)

Returns the value of attribute modifications.



38
39
40
# File 'lib/djnml.rb', line 38

def modifications
  @modifications
end

#msizeObject (readonly)

Returns the value of attribute msize.



38
39
40
# File 'lib/djnml.rb', line 38

def msize
  @msize
end

#news_sourceObject (readonly)

Returns the value of attribute news_source.



38
39
40
# File 'lib/djnml.rb', line 38

def news_source
  @news_source
end

#originObject (readonly)

Returns the value of attribute origin.



38
39
40
# File 'lib/djnml.rb', line 38

def origin
  @origin
end

#original_sourceObject (readonly)

Returns the value of attribute original_source.



38
39
40
# File 'lib/djnml.rb', line 38

def original_source
  @original_source
end

#page_citationObject (readonly)

Returns the value of attribute page_citation.



38
39
40
# File 'lib/djnml.rb', line 38

def page_citation
  @page_citation
end

#page_codeObject (readonly)

Returns the value of attribute page_code.



38
39
40
# File 'lib/djnml.rb', line 38

def page_code
  @page_code
end

#productObject (readonly)

Returns the value of attribute product.



38
39
40
# File 'lib/djnml.rb', line 38

def product
  @product
end

#product_codeObject (readonly)

Returns the value of attribute product_code.



38
39
40
# File 'lib/djnml.rb', line 38

def product_code
  @product_code
end

#publisherObject (readonly)

Returns the value of attribute publisher.



38
39
40
# File 'lib/djnml.rb', line 38

def publisher
  @publisher
end

#retentionObject (readonly)

Returns the value of attribute retention.



38
39
40
# File 'lib/djnml.rb', line 38

def retention
  @retention
end

#routing_codeObject (readonly)

Returns the value of attribute routing_code.



38
39
40
# File 'lib/djnml.rb', line 38

def routing_code
  @routing_code
end

#seqObject (readonly)

Returns the value of attribute seq.



38
39
40
# File 'lib/djnml.rb', line 38

def seq
  @seq
end

#service_idObject (readonly)

Returns the value of attribute service_id.



38
39
40
# File 'lib/djnml.rb', line 38

def service_id
  @service_id
end

#stat_codeObject (readonly)

Returns the value of attribute stat_code.



38
39
40
# File 'lib/djnml.rb', line 38

def stat_code
  @stat_code
end

#subject_codeObject (readonly)

Returns the value of attribute subject_code.



38
39
40
# File 'lib/djnml.rb', line 38

def subject_code
  @subject_code
end

#sys_idObject (readonly)

Returns the value of attribute sys_id.



38
39
40
# File 'lib/djnml.rb', line 38

def sys_id
  @sys_id
end

#temp_permObject (readonly)

Returns the value of attribute temp_perm.



38
39
40
# File 'lib/djnml.rb', line 38

def temp_perm
  @temp_perm
end

#textObject (readonly)

Returns the value of attribute text.



38
39
40
# File 'lib/djnml.rb', line 38

def text
  @text
end

#transmission_dateObject (readonly)

Returns the value of attribute transmission_date.



38
39
40
# File 'lib/djnml.rb', line 38

def transmission_date
  @transmission_date
end

#urgencyObject (readonly)

Returns the value of attribute urgency.



38
39
40
# File 'lib/djnml.rb', line 38

def urgency
  @urgency
end

#websiteObject (readonly)

Returns the value of attribute website.



38
39
40
# File 'lib/djnml.rb', line 38

def website
  @website
end

Class Method Details

.load(filename) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
# File 'lib/djnml.rb', line 106

def self.load(filename)

  if filename
    if ! File.exists?(filename)
      raise FileError.new("#{filename}: no such file!")
    end

    obj = self.new
    obj.load(filename)
  end
end

Instance Method Details

#has_content?Boolean

Returns:

  • (Boolean)


458
459
460
# File 'lib/djnml.rb', line 458

def has_content?
  ! self.text.nil?
end

#load(filename) ⇒ Object



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
# File 'lib/djnml.rb', line 118

def load(filename)
  if ! File.exists?(filename)
    raise FileError.new("#{filename}: no such file!")
  end

  parser = Nokogiri::XML(open(filename))

  # doc tag
  #
  begin
    doc                = parser.search('/doc').first
    @msize             = doc['msize'].to_i
    @md5               = doc['md5']
    @sys_id            = doc['sysId']
    @destination       = doc['destination']
    @dist_id           = doc['distId']
    @transmission_date = Time.parse(doc['transmission-date'])
  rescue
    # ignore errors
  end

  doc = nil

  # djnml tag
  #
  begin
    djnml      = parser.search('/doc/djnml').first
    @publisher = djnml['publisher']
    @doc_date  = Time.parse(djnml['docdate'])
    @product   = djnml['product']
    @seq       = djnml['seq'].to_i
    @lang      = djnml['lang']
  rescue
    # ignore errors
  end

  djnml = nil

  # djn-newswires tag
  #
  begin
    newswires    = parser.search('/doc/djnml/head/docdata/djn/djn-newswires').first
    @news_source = newswires['news-source']
    @origin      = newswires['origin']
    @service_id  = newswires['service-id']
  rescue
    # ignore errors
  end

  newswires = nil

  # djn-press-cutout tag
  #
  presscutout = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-press-cutout').first
  presscutout = nil

  # djn-urgency tag
  #
  begin
    urgency  = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-urgency').first
    @urgency = urgency.text.strip.squeeze.to_i
  rescue
    # ignore errors
  end

  urgency = nil


  # djn-mdata
  #
  begin
    mdata             = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata').first
    @brand            = mdata['brand']
    @temp_perm        = mdata['temp-perm']
    @retention        = mdata['retention']
    @hot              = mdata['hot']
    @original_source  = mdata['original-source']
    @accession_number = mdata['accession-number']
    @page_citation    = mdata['page-citation']
    @display_date     = Time.parse(mdata['display-date'])
  rescue
    # ignore errors
  end

  mdata = nil

  # coding / company
  #
  begin
    ccompany      = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-company/c')
    @company_code = ccompany.map { |tag| tag.text.strip }
  rescue
    # ignore errors
  end

  ccompany = nil

  # coding / isin
  #
  begin
    isin       = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-isin/c')
    @isin_code = isin.map { |tag| tag.text.strip }

  rescue
    # ignore errors
  end

  isin = nil

  # coding / page
  #
  begin
    page       = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-page/c')
    @page_code = page.map { |tag| tag.text.strip }

  rescue
    # ignore errors
  end

  page = nil


  # coding / industry
  #
  begin
    industry       = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-industry/c')
    @industry_code = industry.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  industry = nil

  # coding / government
  #
  begin
    government       = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-government/c')
    @government_code = government.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  government = nil


  # coding / subject
  #
  begin
    subject       = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-subject/c')
    @subject_code = subject.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  subject = nil

  # coding / market
  #
  begin
    market = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-market/c')
    @market_code = market.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  market = nil

  # coding / product
  #
  begin
    product = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-product/c')
    @product_code = product.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  product = nil

  # coding / geo
  #
  begin
    geo = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-geo/c')
    @geo_code = geo.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  geo = nil

  # coding / stat
  #
  begin
    stat = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-stat/c')
    @stat_code = stat.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  stat = nil


  # coding / journal
  #
  begin
    journal = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-journal/c')
    @journal_code = journal.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  journal = nil


  # coding / routing
  #
  begin
    routing = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-routing/c')
    @routing_code = routing.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  routing = nil

  # coding / content
  #
  begin
    content = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-content/c')
    @content_code = content.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  content = nil

  # coding / function
  #
  begin
    function = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-function/c')
    @function_code = function.map { |tag| Codes.new(tag.text.strip) }
  rescue
    # ignore errors
  end

  function = nil


  # body / headline
  #
  begin
    headline  = parser.search('/doc/djnml/body/headline').first
    @headline = headline.text.strip
    @headline_brand = headline['brand-display'] if headline['brand-display']
  rescue
    # ignore errors
  end

  headline  = nil

  # body / text
  #
  begin
  text  = parser.search('/doc/djnml/body/text').first
  @html = text.children.to_xml
  @text = text.children.text.strip
  rescue
    # ignore errors
  end

  text  = nil

  # copyright
  #
  begin
    copyright         = parser.search('/doc/djnml/head/copyright').first
    @copyright_year   = copyright['year'].to_s.strip.to_i
    @copyright_holder = copyright['holder']
  rescue
    # ignore errors
  end

  copyright = nil

  # website
  #
  begin
    if @text =~ /Internet:\s+(.+?)$/
      @website = $1.strip
    end
  rescue
    # ignore errors
  end

  if @text =~ /Company:\s+(\S.+?)\s*\n+\s+(\b.+?)\n+\s+(\d+)\s+(\b.+?)\n+/
    @company_name = $1.strip
    @company_address=  $2.strip
    @company_zip = $3.strip
    @company_city = $4.strip
  end

  # language
  #
  begin
    @language = LanguageDetector.instance.detect(@text)
  rescue
    # ignore errors
  end

  # stories to delete
  #
  begin
    @delete = []
    doc_delete = parser.search('/doc/djnml/administration/doc-delete')
    doc_delete.each do |dd|
      @delete << Delete.new(:product => dd['product'],
                            :doc_date => dd['docdate'],
                            :seq => dd['seq'],
                            :publisher => dd['publisher'],
                            :reason => dd['reason'])
    end
  rescue
    # ignore errors
  end

  # replacements
  #
  @modifications = []
    doc_modify = parser.search('/doc/djnml/administration/doc-modify').first

    mods = parser.search('/doc/djnml/administration/doc-modify/modify-replace')
    mods.each do |m|
      @modifications << Modification.new(:doc_date => doc_modify['docdate'],
                                         :product => doc_modify['product'],
                                         :publisher => doc_modify['publisher'],
                                         :seq => doc_modify['seq'],
                                         :xml => m)
    end
  self
end