Module: BatchExtract

Extended by:
BatchExtract
Included in:
BatchExtract
Defined in:
lib/batch_extract.rb

Instance Method Summary collapse

Instance Method Details

#batch_extract_from_dir(source, destination, options = {}) ⇒ Object

batch_extract_from_dir

batch extract book struct form dir

parameters:

+source+     source directory
+destination+   output directory
+options+        optional parameter.
   :format     


14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/batch_extract.rb', line 14

def batch_extract_from_dir(source,destination,options={})
  format = options.delete(:format)
  files = scan_file_from_dir(source,{:format=>format})

  files.each do |file|
    extname = File.extname(file)
    basename = File.basename(file,extname)
    dest_file = File.join(File.dirname(File.join(destination,file.gsub(source,''))),"#{basename}.xml")
    puts "start extract #{file} ..."
    begin
      docbook_xml = case extname
      when '.html'
        ExtractBookStruct.from_html(file,options)
      when '.txt'
        ExtractBookStruct.from_txt(file,options)
      when '.epub'
        ExtractBookStruct.from_epub(file,options)
      else
        nil
      end
      if docbook_xml
        File.open(dest_file,'wb'){|file|file.write docbook_xml}
        puts "success: extract book struct  successfully!"
      end
    #rescue => e
    #  puts "error: #{file} \n#{e.backtrace.join("\n")}"
    end

  end
end

#scan_file_from_dir(dir, options = {}) ⇒ Object

scan_file_from_dir 遍历目录下的文件parameters:

+dir+       


51
52
53
54
55
56
57
# File 'lib/batch_extract.rb', line 51

def scan_file_from_dir(dir,options={})
  files = []
  walk_dir(dir,options) do |file|
    files << file.to_s
  end
  files
end

#walk_dir(path_str, options = {}) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/batch_extract.rb', line 59

def walk_dir(path_str,options={})
  path = Pathname.new(path_str)
  format = options[:format]
  path.children.each do |entry|
    if entry.directory?
      walk_dir(entry) {|x| yield(x)}
    elsif entry.file?
      if format
        if entry.extname == format
          yield entry
        end
      else
        yield entry
      end
    end
  end
end