Method: ContentData::ContentData#from_file

Defined in:: lib/content_data/content_data.rb

#from_file(filename) ⇒ `Object`

TODO validation that file indeed contains ContentData missing TODO class level method? Loading db from file using chunks for better memory performance

# File 'lib/content_data/content_data.rb', line 383

def from_file(filename)
  # read first line (number of contents)
  # calculate line number (number of instances)
  # read number of instances.
  # loop over instances lines (using chunks) and add instances

  unless File.exists? filename
    raise ArgumentError.new "No such a file #{filename}"
  end

  File.open(filename, 'r') { |file|
    # Get number of contents (at first line)
    number_of_contents = file.gets  # this gets the next line or return nil at EOF
    unless (number_of_contents and number_of_contents.match(/^[\d]+$/))  # check that line is of Number format
      raise("Parse error of content data file:#{filename}  line ##{$.}\n" +
            "number of contents should be a number. We got:#{number_of_contents}")
    end
    number_of_contents = number_of_contents.to_i
    # advance file lines over all contents. We need only the instances data to build the content data object
    # use chunks and GC
    contents_chunks = number_of_contents / CHUNK_SIZE
    contents_chunks += 1 if (contents_chunks * CHUNK_SIZE < number_of_contents)
    chunk_index = 0
    while chunk_index < contents_chunks
      chunk_size = CHUNK_SIZE
      if chunk_index + 1 == contents_chunks
        # update last chunk size
        chunk_size = number_of_contents - (chunk_index * CHUNK_SIZE)
      end
      return unless read_contents_chunk(filename, file, chunk_size)
      GC.start
      chunk_index += 1
    end

    # get number of instances
    number_of_instances = file.gets
    unless (number_of_instances and number_of_instances.match(/^[\d]+$/))  # check that line is of Number format
      raise("Parse error of content data file:#{filename}  line ##{$.}\n" +
            "number of instances should be a Number. We got:#{number_of_instances}")
    end
    number_of_instances = number_of_instances.to_i
    # read in instances chunks and GC
    instances_chunks = number_of_instances / CHUNK_SIZE
    instances_chunks += 1 if (instances_chunks * CHUNK_SIZE < number_of_instances)
    chunk_index = 0
    while chunk_index < instances_chunks
      chunk_size = CHUNK_SIZE
      if chunk_index + 1 == instances_chunks
        # update last chunk size
        chunk_size = number_of_instances - (chunk_index * CHUNK_SIZE)
      end
      return unless read_instances_chunk(filename, file, chunk_size)
      GC.start
      chunk_index += 1
    end

    # get number of symlinks
    number_of_symlinks = file.gets
    unless (number_of_symlinks and number_of_symlinks.match(/^[\d]+$/))  # check that line is of Number format
      raise("Parse error of content data file:#{filename}  line ##{$.}\n" +
                "number of symlinks should be a Number. We got:#{number_of_symlinks}")
    end
    number_of_symlinks.to_i.times {
      symlinks_line = file.gets
      unless symlinks_line
        raise("Parse error of content data file:#{filename}  line ##{$.}\n" +
               "Expected to read symlink line but reached EOF")
      end
      parameters = symlinks_line.split('<')
      if (3 != parameters.length)
        raise("Parse error of content data file:#{filename}  line ##{$.}\n" +
              "Expected to read 3 fields ('<' separated) but got #{parameters.length}.\nLine:#{symlinks_line}")
      end

      @symlinks_info[[parameters[0],parameters[1]]] = parameters[2]
    }
  }
end

Method: ContentData::ContentData#from_file

#from_file(filename) ⇒ Object

#from_file(filename) ⇒ `Object`