Module: Documentalist
- Defined in:
- lib/dependencies.rb,
lib/documentalist.rb,
lib/backends/net_pbm.rb,
lib/backends/odf_merge.rb,
lib/backends/pdf_tools.rb,
lib/backends/open_office.rb,
lib/backends/wkhtmltopdf.rb
Defined Under Namespace
Modules: Dependencies, NetPBM, ODFMerge, OpenOffice, PdfTools, WkHtmlToPdf Classes: Error
Constant Summary collapse
- BACKENDS =
{ # Find a better pattern to pick backend, this one smells pretty bad :WkHtmlToPdf => {[:html, :htm] => :pdf}, :OpenOffice => {[:odt, :doc, :rtf, :docx, :txt, :wps] => [:odt, :doc, :rtf, :pdf, :txt, :html, :htm, :wps]}, :NetPBM => {:ppm => [:jpg, :jpeg]}, :PdfTools => {:pdf => :txt}, }
- @@config =
{}
- @@logger =
nil
Class Method Summary collapse
-
.backend_for_conversion(origin, destination) ⇒ Object
Finds the relevant server to perform the conversion.
-
.check_dependencies ⇒ Object
Checks the dependencies for backends.
- .config ⇒ Object
- .config=(hash) ⇒ Object
- .config? ⇒ Boolean
- .config_from_yaml!(file, options = {}) ⇒ Object
-
.convert(file, options = {}) {|| ... } ⇒ Object
Takes all conversion requests and dispatches them appropriately.
- .default_config! ⇒ Object
- .extract_images(file) {|image_files| ... } ⇒ Object
- .extract_text(file) ⇒ Object
-
.logger ⇒ Object
Returns the logger object used to log documentalist operations.
-
.odf_merge(template, options = {}) ⇒ Object
Merge an ODF document with an arbitrary hash of data.
-
.symbolize(hash) ⇒ Object
Returns a new hash with recursively symbolized keys.
-
.timeout(time_limit = 0, options = {:attempts => 1, :sleep => nil}) ⇒ Object
Runs a block with a system-enforced timeout and optionally retry with an optional sleep between attempts of running the given block.
Class Method Details
.backend_for_conversion(origin, destination) ⇒ Object
Finds the relevant server to perform the conversion
44 45 46 47 48 49 50 51 |
# File 'lib/documentalist.rb', line 44 def self.backend_for_conversion(origin, destination) origin = origin.to_s.gsub(/.*\./, "").to_sym destination = destination.to_s.gsub(/.*\./, "").to_sym BACKENDS.map { |b| [send(:const_get, b[0]), b[1]] }.detect do |s, conversions| conversions.keys.flatten.include?(origin) and conversions.values.flatten.include?(destination) end.to_a.first end |
.check_dependencies ⇒ Object
Checks the dependencies for backends
141 142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/documentalist.rb', line 141 def self.check_dependencies puts "Checking backends system dependencies" Documentalist.constants.each do |backend| backend = Documentalist.const_get backend.to_sym if backend.respond_to? :check_dependencies puts "Checking dependencies for #{backend.to_s}" backend.send :check_dependencies end end end |
.config ⇒ Object
12 13 14 15 |
# File 'lib/documentalist.rb', line 12 def self.config default_config! unless config? @@config end |
.config=(hash) ⇒ Object
17 18 19 20 |
# File 'lib/documentalist.rb', line 17 def self.config=(hash) # We want to symbolize keys ourselves since we're not depending on Active Support @@config = symbolize hash end |
.config? ⇒ Boolean
22 23 24 |
# File 'lib/documentalist.rb', line 22 def self.config? @@config != {} end |
.config_from_yaml!(file, options = {}) ⇒ Object
30 31 32 33 |
# File 'lib/documentalist.rb', line 30 def self.config_from_yaml!(file, = {}) self.config = YAML::load(File.open(file)) self.config = config[[:section].to_sym] if [:section] end |
.convert(file, options = {}) {|| ... } ⇒ Object
Takes all conversion requests and dispatches them appropriately
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/documentalist.rb', line 54 def self.convert(file, ={}) raise "#{file} does not exist !" unless File.exist?(file) if [:to_format] [:to] = file.gsub(/#{"\\" + File.extname(file)}$/, ".#{[:to_format].to_s}") elsif [:to] [:to_format] = File.extname([:to]).gsub(/\./, "").to_sym else raise Documentalist::Error.new("No destination or format was given") end [:from_format] = File.extname(file).gsub(/\./, "").to_sym backend = backend_for_conversion([:from_format], [:to_format]) backend.convert(file, ) yield([:to]) if block_given? [:to] end |
.default_config! ⇒ Object
26 27 28 |
# File 'lib/documentalist.rb', line 26 def self.default_config! config_from_yaml! File.join(File.dirname(__FILE__), %w{.. config default.yml}) end |
.extract_images(file) {|image_files| ... } ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/documentalist.rb', line 85 def self.extract_images(file) temp_dir = File.join(Dir.tmpdir, rand(10**9).to_s) if File.extname(file) == '.pdf' temp_file = File.join(temp_dir, File.basename(file)) FileUtils.mkdir_p temp_dir FileUtils.cp file, temp_file system "pdfimages #{temp_file} '#{File.join(temp_dir, "img")}'" Dir.glob(File.join(temp_dir, "*.ppm")).each do |ppm_image| #raise ppm_image Documentalist.convert(ppm_image, :to_format => :jpeg) end else Documentalist.convert file, :to_format => :html end image_files = Dir.glob(File.join(temp_dir, "*.{jpg,jpeg,bmp,tif,tiff,gif,png}")) yield(image_files) if block_given? image_files end |
.extract_text(file) ⇒ Object
74 75 76 77 78 79 80 81 82 83 |
# File 'lib/documentalist.rb', line 74 def self.extract_text(file) converted = convert(file, :to_format => :txt) if converted and File.exist?(converted) text = Kconv.toutf8(File.open(converted).read) FileUtils.rm(converted) yield(text) if block_given? text end end |
.logger ⇒ Object
Returns the logger object used to log documentalist operations
130 131 132 133 134 135 136 137 138 |
# File 'lib/documentalist.rb', line 130 def self.logger unless @@logger Documentalist.config[:log_file] ||= File.join(File.dirname(File.(__FILE__)), %w{.. documentalist.log}) @@logger = Logger.new(Documentalist.config[:log_file]) @@logger.level = Logger.const_get(config[:log_level] ? config[:log_level].upcase : "WARN") end @@logger end |
.odf_merge(template, options = {}) ⇒ Object
Merge an ODF document with an arbitrary hash of data
7 8 9 |
# File 'lib/backends/odf_merge.rb', line 7 def self.odf_merge(template, = {}) ODFMerge.merge_template(template, ) end |
.symbolize(hash) ⇒ Object
Returns a new hash with recursively symbolized keys
155 156 157 158 159 160 |
# File 'lib/documentalist.rb', line 155 def self.symbolize(hash) hash.each_key do |key| hash[key.to_sym] = hash.delete key hash[key.to_sym] = symbolize(hash[key.to_sym]) if hash[key.to_sym].is_a?(Hash) end end |
.timeout(time_limit = 0, options = {:attempts => 1, :sleep => nil}) ⇒ Object
Runs a block with a system-enforced timeout and optionally retry with an optional sleep between attempts of running the given block. All times are in seconds.
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# File 'lib/documentalist.rb', line 113 def self.timeout(time_limit = 0, = {:attempts => 1, :sleep => nil}) if block_given? attempts = [:attempts] || 1 begin SystemTimer.timeout time_limit do yield end rescue Timeout::Error attempts -= 1 sleep([:sleep]) if [:sleep] retry unless attempts.zero? raise end end end |