Class: Document

Inherits:
ApplicationRecord show all
Includes:
Housekeeping, Shared::Identifiers, Shared::IsData, Shared::Notes, Shared::Tags, SoftValidation
Defined in:
app/models/document.rb

Overview

A Document is digital file that has text inhering within it. Handled formats are pdfs and txt at present.

Documents are to Documentation as Images are to Depictions.

Constant Summary

Constants included from SoftValidation

SoftValidation::ANCESTORS_WITH_SOFT_VALIDATIONS

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from SoftValidation

#clear_soft_validations, #fix_for, #fix_soft_validations, #soft_fixed?, #soft_valid?, #soft_validate, #soft_validated?, #soft_validations, #soft_validators

Methods included from Shared::IsData

#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_destroyable?, #is_editable?, #is_in_use?, #is_in_users_projects?, #metamorphosize, #similar

Methods included from Shared::Tags

#reject_tags, #tag_with, #tagged?, #tagged_with?

Methods included from Shared::Notes

#concatenated_notes_string, #reject_notes

Methods included from Shared::Identifiers

#dwc_occurrence_id, #identified?, #next_by_identifier, #previous_by_identifier, #reject_identifiers, #uri, #uuid

Methods included from Housekeeping

#has_polymorphic_relationship?

Methods inherited from ApplicationRecord

transaction_with_retry

Instance Attribute Details

#document_file_content_typeString

Returns the content type (mime).

Returns:

  • (String)

    the content type (mime)



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'app/models/document.rb', line 36

class Document < ApplicationRecord
  include Housekeeping
  include Shared::Identifiers
  include Shared::Notes
  include Shared::Tags
  include Shared::IsData
  include SoftValidation

  attr_accessor :initialize_start_page

  before_destroy :check_for_documentation, prepend: true

  has_many :documentation, dependent: :destroy, inverse_of: :document
  has_many :sources, through: :documentation, source_type: 'Source', source: 'documentation_object'

  has_attached_file :document_file,
    filename_cleaner:  Utilities::CleanseFilename

  validates_attachment_content_type :document_file, content_type: ['application/octet-stream', 'application/pdf', 'text/plain', 'text/xml']
  validates_attachment_presence :document_file
  validates_attachment_size :document_file, greater_than: 1.bytes

  accepts_nested_attributes_for :documentation, allow_destroy: true, reject_if: :reject_documentation

  before_save :set_pdf_metadata, if: -> {
    ActiveSupport::Deprecation.silence do
      changed_attributes.include?('document_file_file_size') &&
        document_file_content_type =~ /pdf/
    end
  }

  def set_pages_by_start(sp = 1)
    write_attribute(:page_map, get_page_map(sp))
  end

  def get_page_map(sp = 1)
    m = {}
    if page_total && sp
      (0..(page_total - 1)).each do |p|
        m[p + 1] = (p + sp.to_i).to_s
      end
    end
    m
  end

  # @return [Array]
  def pdf_page_for(printed_page)
    p = []
    page_map.each do |pdf_page, v|
      p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s)
    end
    p
  end

  def set_page_map_page(index, page)
    return false if index.kind_of?(Array) && page.kind_of?(Array)
    return false if !index.kind_of?(Array) && (index.to_i > page_total)

    p = page_map

    [index].flatten.map(&:to_s).each do |i|
      if page.kind_of?(Array)
        p[i] = page.map(&:to_s)
      else
        p[i] = page.to_s
      end
    end

    update_attribute(:page_map, p)
  end

  def initialize_start_page=(value)
    write_attribute(:page_map, get_page_map(value))
    @initialize_start_page = value
  end

  def pdftotext
    `pdftotext -layout #{document_file.path} -`
  end

  protected

  def check_for_documentation
    if documentation.count > 1
      errors.add(:base, 'document is used in more than one place, remove documentation first')
      throw :abort
    end
  end

  def 
    begin
      File.open(document_file.staged_path, 'rb') do |io|
        reader = PDF::Reader.new(io)
        write_attribute(:page_total, reader.page_count)
      end
    rescue PDF::Reader::MalformedPDFError
      errors.add(:base, 'pdf is malformed')
    rescue PDF::Reader::EncryptedPDFError
      errors.add(:base, 'pdf is encrypted')
    rescue PDF::Reader::UnsupportedFeatureError
      errors.add(:base, 'pdf contains features not supported by the software')
    end
    set_pages_by_start(initialize_start_page) if initialize_start_page
  end

  def reject_documentation(attributed)
    attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?)
  end
end

#document_file_file_nameString

the name of the file as uploaded by the user.

Returns:

  • (String)


36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'app/models/document.rb', line 36

class Document < ApplicationRecord
  include Housekeeping
  include Shared::Identifiers
  include Shared::Notes
  include Shared::Tags
  include Shared::IsData
  include SoftValidation

  attr_accessor :initialize_start_page

  before_destroy :check_for_documentation, prepend: true

  has_many :documentation, dependent: :destroy, inverse_of: :document
  has_many :sources, through: :documentation, source_type: 'Source', source: 'documentation_object'

  has_attached_file :document_file,
    filename_cleaner:  Utilities::CleanseFilename

  validates_attachment_content_type :document_file, content_type: ['application/octet-stream', 'application/pdf', 'text/plain', 'text/xml']
  validates_attachment_presence :document_file
  validates_attachment_size :document_file, greater_than: 1.bytes

  accepts_nested_attributes_for :documentation, allow_destroy: true, reject_if: :reject_documentation

  before_save :set_pdf_metadata, if: -> {
    ActiveSupport::Deprecation.silence do
      changed_attributes.include?('document_file_file_size') &&
        document_file_content_type =~ /pdf/
    end
  }

  def set_pages_by_start(sp = 1)
    write_attribute(:page_map, get_page_map(sp))
  end

  def get_page_map(sp = 1)
    m = {}
    if page_total && sp
      (0..(page_total - 1)).each do |p|
        m[p + 1] = (p + sp.to_i).to_s
      end
    end
    m
  end

  # @return [Array]
  def pdf_page_for(printed_page)
    p = []
    page_map.each do |pdf_page, v|
      p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s)
    end
    p
  end

  def set_page_map_page(index, page)
    return false if index.kind_of?(Array) && page.kind_of?(Array)
    return false if !index.kind_of?(Array) && (index.to_i > page_total)

    p = page_map

    [index].flatten.map(&:to_s).each do |i|
      if page.kind_of?(Array)
        p[i] = page.map(&:to_s)
      else
        p[i] = page.to_s
      end
    end

    update_attribute(:page_map, p)
  end

  def initialize_start_page=(value)
    write_attribute(:page_map, get_page_map(value))
    @initialize_start_page = value
  end

  def pdftotext
    `pdftotext -layout #{document_file.path} -`
  end

  protected

  def check_for_documentation
    if documentation.count > 1
      errors.add(:base, 'document is used in more than one place, remove documentation first')
      throw :abort
    end
  end

  def 
    begin
      File.open(document_file.staged_path, 'rb') do |io|
        reader = PDF::Reader.new(io)
        write_attribute(:page_total, reader.page_count)
      end
    rescue PDF::Reader::MalformedPDFError
      errors.add(:base, 'pdf is malformed')
    rescue PDF::Reader::EncryptedPDFError
      errors.add(:base, 'pdf is encrypted')
    rescue PDF::Reader::UnsupportedFeatureError
      errors.add(:base, 'pdf contains features not supported by the software')
    end
    set_pages_by_start(initialize_start_page) if initialize_start_page
  end

  def reject_documentation(attributed)
    attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?)
  end
end

#document_file_file_sizeInteger

Returns size of the document in K.

Returns:

  • (Integer)

    size of the document in K



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'app/models/document.rb', line 36

class Document < ApplicationRecord
  include Housekeeping
  include Shared::Identifiers
  include Shared::Notes
  include Shared::Tags
  include Shared::IsData
  include SoftValidation

  attr_accessor :initialize_start_page

  before_destroy :check_for_documentation, prepend: true

  has_many :documentation, dependent: :destroy, inverse_of: :document
  has_many :sources, through: :documentation, source_type: 'Source', source: 'documentation_object'

  has_attached_file :document_file,
    filename_cleaner:  Utilities::CleanseFilename

  validates_attachment_content_type :document_file, content_type: ['application/octet-stream', 'application/pdf', 'text/plain', 'text/xml']
  validates_attachment_presence :document_file
  validates_attachment_size :document_file, greater_than: 1.bytes

  accepts_nested_attributes_for :documentation, allow_destroy: true, reject_if: :reject_documentation

  before_save :set_pdf_metadata, if: -> {
    ActiveSupport::Deprecation.silence do
      changed_attributes.include?('document_file_file_size') &&
        document_file_content_type =~ /pdf/
    end
  }

  def set_pages_by_start(sp = 1)
    write_attribute(:page_map, get_page_map(sp))
  end

  def get_page_map(sp = 1)
    m = {}
    if page_total && sp
      (0..(page_total - 1)).each do |p|
        m[p + 1] = (p + sp.to_i).to_s
      end
    end
    m
  end

  # @return [Array]
  def pdf_page_for(printed_page)
    p = []
    page_map.each do |pdf_page, v|
      p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s)
    end
    p
  end

  def set_page_map_page(index, page)
    return false if index.kind_of?(Array) && page.kind_of?(Array)
    return false if !index.kind_of?(Array) && (index.to_i > page_total)

    p = page_map

    [index].flatten.map(&:to_s).each do |i|
      if page.kind_of?(Array)
        p[i] = page.map(&:to_s)
      else
        p[i] = page.to_s
      end
    end

    update_attribute(:page_map, p)
  end

  def initialize_start_page=(value)
    write_attribute(:page_map, get_page_map(value))
    @initialize_start_page = value
  end

  def pdftotext
    `pdftotext -layout #{document_file.path} -`
  end

  protected

  def check_for_documentation
    if documentation.count > 1
      errors.add(:base, 'document is used in more than one place, remove documentation first')
      throw :abort
    end
  end

  def 
    begin
      File.open(document_file.staged_path, 'rb') do |io|
        reader = PDF::Reader.new(io)
        write_attribute(:page_total, reader.page_count)
      end
    rescue PDF::Reader::MalformedPDFError
      errors.add(:base, 'pdf is malformed')
    rescue PDF::Reader::EncryptedPDFError
      errors.add(:base, 'pdf is encrypted')
    rescue PDF::Reader::UnsupportedFeatureError
      errors.add(:base, 'pdf contains features not supported by the software')
    end
    set_pages_by_start(initialize_start_page) if initialize_start_page
  end

  def reject_documentation(attributed)
    attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?)
  end
end

#document_file_updated_atTimestamp

Returns last time this document was updated.

Returns:

  • (Timestamp)

    last time this document was updated



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'app/models/document.rb', line 36

class Document < ApplicationRecord
  include Housekeeping
  include Shared::Identifiers
  include Shared::Notes
  include Shared::Tags
  include Shared::IsData
  include SoftValidation

  attr_accessor :initialize_start_page

  before_destroy :check_for_documentation, prepend: true

  has_many :documentation, dependent: :destroy, inverse_of: :document
  has_many :sources, through: :documentation, source_type: 'Source', source: 'documentation_object'

  has_attached_file :document_file,
    filename_cleaner:  Utilities::CleanseFilename

  validates_attachment_content_type :document_file, content_type: ['application/octet-stream', 'application/pdf', 'text/plain', 'text/xml']
  validates_attachment_presence :document_file
  validates_attachment_size :document_file, greater_than: 1.bytes

  accepts_nested_attributes_for :documentation, allow_destroy: true, reject_if: :reject_documentation

  before_save :set_pdf_metadata, if: -> {
    ActiveSupport::Deprecation.silence do
      changed_attributes.include?('document_file_file_size') &&
        document_file_content_type =~ /pdf/
    end
  }

  def set_pages_by_start(sp = 1)
    write_attribute(:page_map, get_page_map(sp))
  end

  def get_page_map(sp = 1)
    m = {}
    if page_total && sp
      (0..(page_total - 1)).each do |p|
        m[p + 1] = (p + sp.to_i).to_s
      end
    end
    m
  end

  # @return [Array]
  def pdf_page_for(printed_page)
    p = []
    page_map.each do |pdf_page, v|
      p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s)
    end
    p
  end

  def set_page_map_page(index, page)
    return false if index.kind_of?(Array) && page.kind_of?(Array)
    return false if !index.kind_of?(Array) && (index.to_i > page_total)

    p = page_map

    [index].flatten.map(&:to_s).each do |i|
      if page.kind_of?(Array)
        p[i] = page.map(&:to_s)
      else
        p[i] = page.to_s
      end
    end

    update_attribute(:page_map, p)
  end

  def initialize_start_page=(value)
    write_attribute(:page_map, get_page_map(value))
    @initialize_start_page = value
  end

  def pdftotext
    `pdftotext -layout #{document_file.path} -`
  end

  protected

  def check_for_documentation
    if documentation.count > 1
      errors.add(:base, 'document is used in more than one place, remove documentation first')
      throw :abort
    end
  end

  def 
    begin
      File.open(document_file.staged_path, 'rb') do |io|
        reader = PDF::Reader.new(io)
        write_attribute(:page_total, reader.page_count)
      end
    rescue PDF::Reader::MalformedPDFError
      errors.add(:base, 'pdf is malformed')
    rescue PDF::Reader::EncryptedPDFError
      errors.add(:base, 'pdf is encrypted')
    rescue PDF::Reader::UnsupportedFeatureError
      errors.add(:base, 'pdf contains features not supported by the software')
    end
    set_pages_by_start(initialize_start_page) if initialize_start_page
  end

  def reject_documentation(attributed)
    attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?)
  end
end

#initialize_start_pageObject

Returns the value of attribute initialize_start_page.



44
45
46
# File 'app/models/document.rb', line 44

def initialize_start_page
  @initialize_start_page
end

#page_mapHash

Returns a map of PDF page to printed page #, pages index starts at 1 behaviour:

if no integer exists for a PDF page then page is assumed to be the page # of the PDF (almost never the real case)
if an integer is provided it points to the page(s) represented in print
e.g.:
   { "1": "300",
     "2": ["301", "302", "xi"]
   }
mapping can be many to many:
   { "1": ["300", "301"]
     "2": ["301"]
   } ... printed page 301 is on pdf pages 1,2; page 1 contains printed pages 300, and part of 301.

Returns:

  • (Hash)

    a map of PDF page to printed page #, pages index starts at 1 behaviour:

    if no integer exists for a PDF page then page is assumed to be the page # of the PDF (almost never the real case)
    if an integer is provided it points to the page(s) represented in print
    e.g.:
       { "1": "300",
         "2": ["301", "302", "xi"]
       }
    mapping can be many to many:
       { "1": ["300", "301"]
         "2": ["301"]
       } ... printed page 301 is on pdf pages 1,2; page 1 contains printed pages 300, and part of 301
    


36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'app/models/document.rb', line 36

class Document < ApplicationRecord
  include Housekeeping
  include Shared::Identifiers
  include Shared::Notes
  include Shared::Tags
  include Shared::IsData
  include SoftValidation

  attr_accessor :initialize_start_page

  before_destroy :check_for_documentation, prepend: true

  has_many :documentation, dependent: :destroy, inverse_of: :document
  has_many :sources, through: :documentation, source_type: 'Source', source: 'documentation_object'

  has_attached_file :document_file,
    filename_cleaner:  Utilities::CleanseFilename

  validates_attachment_content_type :document_file, content_type: ['application/octet-stream', 'application/pdf', 'text/plain', 'text/xml']
  validates_attachment_presence :document_file
  validates_attachment_size :document_file, greater_than: 1.bytes

  accepts_nested_attributes_for :documentation, allow_destroy: true, reject_if: :reject_documentation

  before_save :set_pdf_metadata, if: -> {
    ActiveSupport::Deprecation.silence do
      changed_attributes.include?('document_file_file_size') &&
        document_file_content_type =~ /pdf/
    end
  }

  def set_pages_by_start(sp = 1)
    write_attribute(:page_map, get_page_map(sp))
  end

  def get_page_map(sp = 1)
    m = {}
    if page_total && sp
      (0..(page_total - 1)).each do |p|
        m[p + 1] = (p + sp.to_i).to_s
      end
    end
    m
  end

  # @return [Array]
  def pdf_page_for(printed_page)
    p = []
    page_map.each do |pdf_page, v|
      p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s)
    end
    p
  end

  def set_page_map_page(index, page)
    return false if index.kind_of?(Array) && page.kind_of?(Array)
    return false if !index.kind_of?(Array) && (index.to_i > page_total)

    p = page_map

    [index].flatten.map(&:to_s).each do |i|
      if page.kind_of?(Array)
        p[i] = page.map(&:to_s)
      else
        p[i] = page.to_s
      end
    end

    update_attribute(:page_map, p)
  end

  def initialize_start_page=(value)
    write_attribute(:page_map, get_page_map(value))
    @initialize_start_page = value
  end

  def pdftotext
    `pdftotext -layout #{document_file.path} -`
  end

  protected

  def check_for_documentation
    if documentation.count > 1
      errors.add(:base, 'document is used in more than one place, remove documentation first')
      throw :abort
    end
  end

  def 
    begin
      File.open(document_file.staged_path, 'rb') do |io|
        reader = PDF::Reader.new(io)
        write_attribute(:page_total, reader.page_count)
      end
    rescue PDF::Reader::MalformedPDFError
      errors.add(:base, 'pdf is malformed')
    rescue PDF::Reader::EncryptedPDFError
      errors.add(:base, 'pdf is encrypted')
    rescue PDF::Reader::UnsupportedFeatureError
      errors.add(:base, 'pdf contains features not supported by the software')
    end
    set_pages_by_start(initialize_start_page) if initialize_start_page
  end

  def reject_documentation(attributed)
    attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?)
  end
end

Instance Method Details

#check_for_documentationObject (protected)



118
119
120
121
122
123
# File 'app/models/document.rb', line 118

def check_for_documentation
  if documentation.count > 1
    errors.add(:base, 'document is used in more than one place, remove documentation first')
    throw :abort
  end
end

#get_page_map(sp = 1) ⇒ Object



71
72
73
74
75
76
77
78
79
# File 'app/models/document.rb', line 71

def get_page_map(sp = 1)
  m = {}
  if page_total && sp
    (0..(page_total - 1)).each do |p|
      m[p + 1] = (p + sp.to_i).to_s
    end
  end
  m
end

#pdf_page_for(printed_page) ⇒ Array

Returns:

  • (Array)


82
83
84
85
86
87
88
# File 'app/models/document.rb', line 82

def pdf_page_for(printed_page)
  p = []
  page_map.each do |pdf_page, v|
    p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s)
  end
  p
end

#pdftotextObject



112
113
114
# File 'app/models/document.rb', line 112

def pdftotext
  `pdftotext -layout #{document_file.path} -`
end

#reject_documentation(attributed) ⇒ Object (protected)



141
142
143
# File 'app/models/document.rb', line 141

def reject_documentation(attributed)
  attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?)
end

#set_page_map_page(index, page) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'app/models/document.rb', line 90

def set_page_map_page(index, page)
  return false if index.kind_of?(Array) && page.kind_of?(Array)
  return false if !index.kind_of?(Array) && (index.to_i > page_total)

  p = page_map

  [index].flatten.map(&:to_s).each do |i|
    if page.kind_of?(Array)
      p[i] = page.map(&:to_s)
    else
      p[i] = page.to_s
    end
  end

  update_attribute(:page_map, p)
end

#set_pages_by_start(sp = 1) ⇒ Object



67
68
69
# File 'app/models/document.rb', line 67

def set_pages_by_start(sp = 1)
  write_attribute(:page_map, get_page_map(sp))
end

#set_pdf_metadataObject (protected)



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'app/models/document.rb', line 125

def 
  begin
    File.open(document_file.staged_path, 'rb') do |io|
      reader = PDF::Reader.new(io)
      write_attribute(:page_total, reader.page_count)
    end
  rescue PDF::Reader::MalformedPDFError
    errors.add(:base, 'pdf is malformed')
  rescue PDF::Reader::EncryptedPDFError
    errors.add(:base, 'pdf is encrypted')
  rescue PDF::Reader::UnsupportedFeatureError
    errors.add(:base, 'pdf contains features not supported by the software')
  end
  set_pages_by_start(initialize_start_page) if initialize_start_page
end