Module: Documentalist
- Defined in:
- lib/dependencies.rb,
lib/documentalist.rb,
lib/backends/net_pbm.rb,
lib/backends/odf_merge.rb,
lib/backends/pdf_tools.rb,
lib/backends/open_office.rb,
lib/backends/wkhtmltopdf.rb
Defined Under Namespace
Modules: Dependencies, NetPBM, ODFMerge, OpenOffice, PdfTools, WkHtmlToPdf Classes: Error
Constant Summary collapse
- BACKENDS =
{ # Find a better pattern to pick backend, this one smells pretty bad :WkHtmlToPdf => {[:html, :htm] => :pdf}, :OpenOffice => {[:odt, :doc, :rtf, :docx, :txt, :wps] => [:odt, :doc, :rtf, :pdf, :txt, :html, :htm, :wps]}, :NetPBM => {:ppm => [:jpg, :jpeg]}, :PdfTools => {:pdf => :txt}, }
- @@config =
{}
- @@logger =
nil
Class Method Summary collapse
-
.backend_for_conversion(origin, destination) ⇒ Object
Finds the relevant server to perform the conversion.
-
.check_dependencies ⇒ Object
Checks the dependencies for backends.
- .config ⇒ Object
- .config=(hash) ⇒ Object
- .config? ⇒ Boolean
- .config_from_yaml!(file, options = {}) ⇒ Object
-
.convert(file = nil, options = {}) ⇒ Object
Takes all conversion requests and dispatches them appropriately.
- .default_config! ⇒ Object
- .extract_images(file) {|image_files| ... } ⇒ Object
- .extract_text(file) ⇒ Object
-
.logger ⇒ Object
Returns the logger object used to log documentalist operations.
-
.odf_merge(template, options = {}) ⇒ Object
Merge an ODF document with an arbitrary hash of data.
-
.symbolize(hash) ⇒ Object
Returns a new hash with recursively symbolized keys.
-
.timeout(time_limit = 0, options = {:attempts => 1, :sleep => nil}) ⇒ Object
Runs a block with a system-enforced timeout and optionally retry with an optional sleep between attempts of running the given block.
Class Method Details
.backend_for_conversion(origin, destination) ⇒ Object
Finds the relevant server to perform the conversion
44 45 46 47 48 49 50 51 |
# File 'lib/documentalist.rb', line 44 def self.backend_for_conversion(origin, destination) origin = origin.to_s.gsub(/.*\./, "").to_sym destination = destination.to_s.gsub(/.*\./, "").to_sym BACKENDS.map { |b| [send(:const_get, b[0]), b[1]] }.detect do |s, conversions| conversions.keys.flatten.include?(origin) and conversions.values.flatten.include?(destination) end.to_a.first end |
.check_dependencies ⇒ Object
Checks the dependencies for backends
160 161 162 163 164 165 166 167 168 169 170 171 |
# File 'lib/documentalist.rb', line 160 def self.check_dependencies puts "Checking backends system dependencies" Documentalist.constants.each do |backend| backend = Documentalist.const_get backend.to_sym if backend.respond_to? :check_dependencies puts "Checking dependencies for #{backend.to_s}" backend.send :check_dependencies end end end |
.config ⇒ Object
12 13 14 15 |
# File 'lib/documentalist.rb', line 12 def self.config default_config! unless config? @@config end |
.config=(hash) ⇒ Object
17 18 19 20 |
# File 'lib/documentalist.rb', line 17 def self.config=(hash) # We want to symbolize keys ourselves since we're not depending on Active Support @@config = symbolize hash end |
.config? ⇒ Boolean
22 23 24 |
# File 'lib/documentalist.rb', line 22 def self.config? @@config != {} end |
.config_from_yaml!(file, options = {}) ⇒ Object
30 31 32 33 |
# File 'lib/documentalist.rb', line 30 def self.config_from_yaml!(file, = {}) self.config = YAML::load(File.open(file)) self.config = config[[:section].to_sym] if [:section] end |
.convert(file = nil, options = {}) ⇒ Object
Takes all conversion requests and dispatches them appropriately
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/documentalist.rb', line 54 def self.convert(file=nil, ={}) if [:input] and [:input_format] and file.nil? file = File.join(Dir.tmpdir, "#{rand(10**9)}.#{[:input_format].to_s}") File.open(file, 'w') { |f| f.write([:input]) } end raise Documentalist::Error.new("#{file} does not exist !") unless File.exist?(file) if [:to_format] [:to] = file.gsub(/#{"\\" + File.extname(file)}$/, ".#{[:to_format].to_s}") elsif [:to] [:to_format] = File.extname([:to]).gsub(/\./, "").to_sym elsif [:stream] [:to_format] = [:stream] [:to] = File.join(Dir.tmpdir, "#{rand(10**9)}.#{[:to_format]}") else raise Documentalist::Error.new("No destination, format, or stream format was given") end [:from_format] = File.extname(file).gsub(/\./, "").to_sym backend = backend_for_conversion([:from_format], [:to_format]) backend.convert(file, ) if [:input] and [:input_format] and file.nil? FileUtils.rm(file) end if [:stream] data = File.read([:to]) FileUtils.rm([:to]) yield(data) if block_given? data else yield([:to]) if block_given? [:to] end end |
.default_config! ⇒ Object
26 27 28 |
# File 'lib/documentalist.rb', line 26 def self.default_config! config_from_yaml! File.join(File.dirname(__FILE__), %w{.. config default.yml}) end |
.extract_images(file) {|image_files| ... } ⇒ Object
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# File 'lib/documentalist.rb', line 104 def self.extract_images(file) temp_dir = File.join(Dir.tmpdir, rand(10**9).to_s) if File.extname(file) == '.pdf' temp_file = File.join(temp_dir, File.basename(file)) FileUtils.mkdir_p temp_dir FileUtils.cp file, temp_file system "pdfimages #{temp_file} '#{File.join(temp_dir, "img")}'" Dir.glob(File.join(temp_dir, "*.ppm")).each do |ppm_image| #raise ppm_image Documentalist.convert(ppm_image, :to_format => :jpeg) end else Documentalist.convert file, :to_format => :html end image_files = Dir.glob(File.join(temp_dir, "*.{jpg,jpeg,bmp,tif,tiff,gif,png}")) yield(image_files) if block_given? image_files end |
.extract_text(file) ⇒ Object
93 94 95 96 97 98 99 100 101 102 |
# File 'lib/documentalist.rb', line 93 def self.extract_text(file) converted = convert(file, :to_format => :txt) if converted and File.exist?(converted) text = Kconv.toutf8(File.open(converted).read) FileUtils.rm(converted) yield(text) if block_given? text end end |
.logger ⇒ Object
Returns the logger object used to log documentalist operations
149 150 151 152 153 154 155 156 157 |
# File 'lib/documentalist.rb', line 149 def self.logger unless @@logger Documentalist.config[:log_file] ||= File.join(File.dirname(File.(__FILE__)), %w{.. documentalist.log}) @@logger = Logger.new(Documentalist.config[:log_file]) @@logger.level = Logger.const_get(config[:log_level] ? config[:log_level].upcase : "WARN") end @@logger end |
.odf_merge(template, options = {}) ⇒ Object
Merge an ODF document with an arbitrary hash of data
7 8 9 |
# File 'lib/backends/odf_merge.rb', line 7 def self.odf_merge(template, = {}) ODFMerge.merge_template(template, ) end |
.symbolize(hash) ⇒ Object
Returns a new hash with recursively symbolized keys
174 175 176 177 178 179 |
# File 'lib/documentalist.rb', line 174 def self.symbolize(hash) hash.each_key do |key| hash[key.to_sym] = hash.delete key hash[key.to_sym] = symbolize(hash[key.to_sym]) if hash[key.to_sym].is_a?(Hash) end end |
.timeout(time_limit = 0, options = {:attempts => 1, :sleep => nil}) ⇒ Object
Runs a block with a system-enforced timeout and optionally retry with an optional sleep between attempts of running the given block. All times are in seconds.
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/documentalist.rb', line 132 def self.timeout(time_limit = 0, = {:attempts => 1, :sleep => nil}) if block_given? attempts = [:attempts] || 1 begin SystemTimer.timeout time_limit do yield end rescue Timeout::Error attempts -= 1 sleep([:sleep]) if [:sleep] retry unless attempts.zero? raise end end end |