Class: RDig::FileDocument

Inherits:
Document show all
Defined in:
lib/rdig/documents.rb

Overview

Document in a File system

Instance Attribute Summary

Attributes inherited from Document

#content, #content_type, #uri

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Document

#body, create, #has_content?, #links, #needs_indexing?, #title, #to_s

Constructor Details

#initialize(args = {}) ⇒ FileDocument

Returns a new instance of FileDocument.



55
56
57
# File 'lib/rdig/documents.rb', line 55

def initialize(args={})
  super(args)
end

Class Method Details

.find_files(path) ⇒ Object



63
64
65
66
67
68
69
70
71
72
# File 'lib/rdig/documents.rb', line 63

def self.find_files(path)
  links = []
  pattern = /.+\.(#{File::FILE_EXTENSION_MIME_TYPES.keys.join('|')})$/i
  Dir.glob(File.expand_path(File.join(path, '*'))) do |filename|
    RDig.logger.debug "checking file #{filename}"
    # Skip files not matching known mime types
    links << "file://#{filename}" if File.directory?(filename) || filename =~ pattern
  end
  links
end

Instance Method Details

#create_child(uri) ⇒ Object



59
60
61
# File 'lib/rdig/documents.rb', line 59

def create_child(uri)
  FileDocument.new(:uri => uri)
end

#fetchObject



78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/rdig/documents.rb', line 78

def fetch
  if File.directory? @uri.path
    # directories are treated like a link collection
    @content = { :links => self.class.find_files(@uri.path) }
  else
    # process this file's contents
    open(@uri.path) do |file|
      @content = ContentExtractors.process(file.read, file.content_type)
      @content[:links] = nil if @content # don't follow links inside files
    end
  end
  @content ||= {}
end

#file?Boolean

Returns:

  • (Boolean)


74
75
76
# File 'lib/rdig/documents.rb', line 74

def file?
  File.file? @uri.path
end

#statusObject



92
93
94
# File 'lib/rdig/documents.rb', line 92

def status
  :success if has_content?
end