Class: ExtractMetadata

Inherits:
Object
  • Object
show all
Defined in:
lib/extractmetadata.rb

Instance Method Summary collapse

Constructor Details

#initialize(file, input_dir, output_dir) ⇒ ExtractMetadata



5
6
7
8
9
# File 'lib/extractmetadata.rb', line 5

def initialize(file, input_dir, output_dir)
  @path = file
  @input_dir = input_dir
  @output_dir = output_dir
end

Instance Method Details

#extractObject

Extract metadata



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/extractmetadata.rb', line 12

def extract
  outhash = Hash.new
  
  # Get relative path
  @rel_path = get_rel_path
  outhash[:rel_path] = @rel_path
  outhash[:folders] = get_folders

  # Get formatted name and file type
  outhash[:formatted_name] = get_formatted_name
  outhash[:filetype] = get_file_type

  # Extract file metadata, merge. and return
  begin
    outhash.merge!()
  rescue
  end
  return outhash
end

#extract_file_metadataObject

Extract PDF metadata



59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/extractmetadata.rb', line 59

def 
   = Hash.new
  [:author] = Docsplit.extract_author(@path)
  [:creator] =  Docsplit.extract_creator(@path)
  [:producer] = Docsplit.extract_producer(@path)
  [:title] = Docsplit.extract_title(@path)
  [:subject] = Docsplit.extract_subject(@path)
  [:date] = Docsplit.extract_date(@path)
  [:keywords] = Docsplit.extract_keywords(@path)
  [:length] = Docsplit.extract_length(@path)
  return 
end

#get_file_typeObject

Get file type



54
55
56
# File 'lib/extractmetadata.rb', line 54

def get_file_type
  @rel_path.split(".").last
end

#get_foldersObject

Split relative path and get array of directories



33
34
35
36
37
38
39
40
41
# File 'lib/extractmetadata.rb', line 33

def get_folders
  folders = @rel_path.split("/")

  # Remove file and empty items
  folders.delete(folders.last)
  folders.delete("")

  return folders
end

#get_formatted_nameObject

Get a formatted file name



49
50
51
# File 'lib/extractmetadata.rb', line 49

def get_formatted_name
  @rel_path.split(".").first.gsub("_", " ").gsub("/", "")
end

#get_rel_pathObject

Get the relative path



44
45
46
# File 'lib/extractmetadata.rb', line 44

def get_rel_path
  @path.gsub(@input_dir, "")
end