Module: BatchExtract
Instance Method Summary collapse
-
#batch_extract_from_dir(source, destination, options = {}) ⇒ Object
batch_extract_from_dir batch extract book struct form dir parameters:
sourcesource directorydestinationoutput directoryoptionsoptional parameter. -
#scan_file_from_dir(dir, options = {}) ⇒ Object
scan_file_from_dir 遍历目录下的文件 parameters:
dir需遍历的目录options可选参数 :format 指定需要遍历的文件后缀名,例如要遍历所有pdf文件,通过:format=>‘.pdf’指定. - #walk_dir(path_str, options = {}) ⇒ Object
Instance Method Details
#batch_extract_from_dir(source, destination, options = {}) ⇒ Object
batch_extract_from_dir
batch extract book struct form dir
parameters:
+source+ source directory
+destination+ output directory
++ optional parameter.
:format
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/batch_extract.rb', line 14 def batch_extract_from_dir(source,destination,={}) format = .delete(:format) files = scan_file_from_dir(source,{:format=>format}) files.each do |file| extname = File.extname(file) basename = File.basename(file,extname) dest_file = File.join(File.dirname(File.join(destination,file.gsub(source,''))),"#{basename}.xml") puts "start extract #{file} ..." begin docbook_xml = case extname when '.html' ExtractBookStruct.from_html(file,) when '.txt' ExtractBookStruct.from_txt(file,) when '.epub' ExtractBookStruct.from_epub(file,) else nil end if docbook_xml File.open(dest_file,'wb'){|file|file.write docbook_xml} puts "success: extract book struct successfully!" end #rescue => e # puts "error: #{file} \n#{e.backtrace.join("\n")}" end end end |
#scan_file_from_dir(dir, options = {}) ⇒ Object
scan_file_from_dir 遍历目录下的文件parameters:
+dir+
51 52 53 54 55 56 57 |
# File 'lib/batch_extract.rb', line 51 def scan_file_from_dir(dir,={}) files = [] walk_dir(dir,) do |file| files << file.to_s end files end |
#walk_dir(path_str, options = {}) ⇒ Object
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/batch_extract.rb', line 59 def walk_dir(path_str,={}) path = Pathname.new(path_str) format = [:format] path.children.each do |entry| if entry.directory? walk_dir(entry) {|x| yield(x)} elsif entry.file? if format if entry.extname == format yield entry end else yield entry end end end end |