Module: Documentalist

Defined in:
lib/dependencies.rb,
lib/documentalist.rb,
lib/backends/net_pbm.rb,
lib/backends/odf_merge.rb,
lib/backends/pdf_tools.rb,
lib/backends/open_office.rb,
lib/backends/wkhtmltopdf.rb

Defined Under Namespace

Modules: Dependencies, NetPBM, ODFMerge, OpenOffice, PdfTools, WkHtmlToPdf Classes: Error

Constant Summary collapse

BACKENDS =
{
  # Find a better pattern to pick backend, this one smells pretty bad
  :WkHtmlToPdf => {[:html, :htm] => :pdf},
  :OpenOffice => {[:odt, :doc, :rtf, :docx, :txt, :wps] => [:odt, :doc, :rtf, :pdf, :txt, :html, :htm, :wps]},
  :NetPBM => {:ppm => [:jpg, :jpeg]},
  :PdfTools => {:pdf => :txt},
}
@@config =
{}
@@logger =
nil

Class Method Summary collapse

Class Method Details

.backend_for_conversion(origin, destination) ⇒ Object

Finds the relevant server to perform the conversion



44
45
46
47
48
49
50
51
# File 'lib/documentalist.rb', line 44

def self.backend_for_conversion(origin, destination)
  origin = origin.to_s.gsub(/.*\./, "").to_sym
  destination = destination.to_s.gsub(/.*\./, "").to_sym

  BACKENDS.map { |b| [send(:const_get, b[0]), b[1]] }.detect do |s, conversions|
    conversions.keys.flatten.include?(origin) and conversions.values.flatten.include?(destination)
  end.to_a.first
end

.check_dependenciesObject

Checks the dependencies for backends



160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/documentalist.rb', line 160

def self.check_dependencies
  puts "Checking backends system dependencies"

  Documentalist.constants.each do |backend|
    backend = Documentalist.const_get backend.to_sym

    if backend.respond_to? :check_dependencies
      puts "Checking dependencies for #{backend.to_s}"
      backend.send :check_dependencies
    end
  end
end

.configObject



12
13
14
15
# File 'lib/documentalist.rb', line 12

def self.config
  default_config! unless config?
  @@config
end

.config=(hash) ⇒ Object



17
18
19
20
# File 'lib/documentalist.rb', line 17

def self.config=(hash)
  # We want to symbolize keys ourselves since we're not depending on Active Support
  @@config = symbolize hash
end

.config?Boolean

Returns:

  • (Boolean)


22
23
24
# File 'lib/documentalist.rb', line 22

def self.config?
  @@config != {}
end

.config_from_yaml!(file, options = {}) ⇒ Object



30
31
32
33
# File 'lib/documentalist.rb', line 30

def self.config_from_yaml!(file, options = {})
  self.config = YAML::load(File.open(file))
  self.config = config[options[:section].to_sym] if options[:section]
end

.convert(file = nil, options = {}) ⇒ Object

Takes all conversion requests and dispatches them appropriately



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/documentalist.rb', line 54

def self.convert(file=nil, options={})
  if options[:input] and options[:input_format] and file.nil?
    file = File.join(Dir.tmpdir, "#{rand(10**9)}.#{options[:input_format].to_s}")
    File.open(file, 'w') { |f| f.write(options[:input]) }
  end

  raise Documentalist::Error.new("#{file} does not exist !") unless File.exist?(file)

  if options[:to_format]
    options[:to] = file.gsub(/#{"\\" + File.extname(file)}$/, ".#{options[:to_format].to_s}")
  elsif options[:to]
    options[:to_format] = File.extname(options[:to]).gsub(/\./, "").to_sym
  elsif options[:stream]
    options[:to_format] = options[:stream]
    options[:to] = File.join(Dir.tmpdir, "#{rand(10**9)}.#{options[:to_format]}")
  else
    raise Documentalist::Error.new("No destination, format, or stream format was given")
  end

  options[:from_format] = File.extname(file).gsub(/\./, "").to_sym

  backend = backend_for_conversion(options[:from_format], options[:to_format])
  backend.convert(file, options)

  if options[:input] and options[:input_format] and file.nil?
    FileUtils.rm(file)
  end

  if options[:stream]
    data = File.read(options[:to])
    FileUtils.rm(options[:to])
    yield(data) if block_given?
    data
  else
    yield(options[:to]) if block_given?
    options[:to]
  end
end

.default_config!Object



26
27
28
# File 'lib/documentalist.rb', line 26

def self.default_config!
  config_from_yaml! File.join(File.dirname(__FILE__), %w{.. config default.yml})
end

.extract_images(file) {|image_files| ... } ⇒ Object

Yields:

  • (image_files)


104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/documentalist.rb', line 104

def self.extract_images(file)
  temp_dir = File.join(Dir.tmpdir, rand(10**9).to_s)
  
  if File.extname(file) == '.pdf'
    temp_file = File.join(temp_dir, File.basename(file))

    FileUtils.mkdir_p temp_dir
    FileUtils.cp file, temp_file
    
    system "pdfimages #{temp_file} '#{File.join(temp_dir, "img")}'"

    Dir.glob(File.join(temp_dir, "*.ppm")).each do |ppm_image|
      #raise ppm_image
      Documentalist.convert(ppm_image, :to_format => :jpeg)
    end
  else
    Documentalist.convert file, :to_format => :html
  end

  image_files = Dir.glob(File.join(temp_dir, "*.{jpg,jpeg,bmp,tif,tiff,gif,png}"))

  yield(image_files) if block_given?
  image_files
end

.extract_text(file) ⇒ Object



93
94
95
96
97
98
99
100
101
102
# File 'lib/documentalist.rb', line 93

def self.extract_text(file)
  converted = convert(file, :to_format => :txt)
  
  if converted and File.exist?(converted)
    text = Kconv.toutf8(File.open(converted).read)
    FileUtils.rm(converted)
    yield(text) if block_given?
    text
  end
end

.loggerObject

Returns the logger object used to log documentalist operations



149
150
151
152
153
154
155
156
157
# File 'lib/documentalist.rb', line 149

def self.logger
  unless @@logger
    Documentalist.config[:log_file] ||= File.join(File.dirname(File.expand_path(__FILE__)), %w{.. documentalist.log})
    @@logger = Logger.new(Documentalist.config[:log_file])
    @@logger.level = Logger.const_get(config[:log_level] ? config[:log_level].upcase : "WARN")
  end

  @@logger
end

.odf_merge(template, options = {}) ⇒ Object

Merge an ODF document with an arbitrary hash of data



7
8
9
# File 'lib/backends/odf_merge.rb', line 7

def self.odf_merge(template, options = {})
  ODFMerge.merge_template(template, options)
end

.symbolize(hash) ⇒ Object

Returns a new hash with recursively symbolized keys



174
175
176
177
178
179
# File 'lib/documentalist.rb', line 174

def self.symbolize(hash)
  hash.each_key do |key|
    hash[key.to_sym] = hash.delete key
    hash[key.to_sym] = symbolize(hash[key.to_sym]) if hash[key.to_sym].is_a?(Hash)
  end
end

.timeout(time_limit = 0, options = {:attempts => 1, :sleep => nil}) ⇒ Object

Runs a block with a system-enforced timeout and optionally retry with an optional sleep between attempts of running the given block. All times are in seconds.



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/documentalist.rb', line 132

def self.timeout(time_limit = 0, options = {:attempts => 1, :sleep => nil})
  if block_given?
    attempts = options[:attempts] || 1
    begin
      SystemTimer.timeout time_limit do
        yield
      end
    rescue Timeout::Error
      attempts -= 1
      sleep(options[:sleep]) if options[:sleep]
      retry unless attempts.zero?
      raise
    end
  end
end