Class: Doc2Text::Odt::Document
- Inherits:
-
Object
- Object
- Doc2Text::Odt::Document
- Defined in:
- lib/doc2text/odt.rb
Constant Summary collapse
- EXTRACT_EXTENSION =
'unpacked_odt'
Class Method Summary collapse
Instance Method Summary collapse
- #clean ⇒ Object
- #extract_path ⇒ Object
-
#initialize(document_path) ⇒ Document
constructor
A new instance of Document.
-
#open(filename) ⇒ Object
Open file from the current odt.
- #parse(markdown) ⇒ Object
- #unpack ⇒ Object
-
#xml_file(filename, rood_node_name) ⇒ Object
Parse xml file from the current odt.
Constructor Details
#initialize(document_path) ⇒ Document
Returns a new instance of Document.
30 31 32 |
# File 'lib/doc2text/odt.rb', line 30 def initialize(document_path) @document_path = document_path end |
Class Method Details
.parse_and_save(input, output_filename) ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
# File 'lib/doc2text/odt.rb', line 8 def self.parse_and_save(input, output_filename) odt = new input begin odt.unpack output = File.open output_filename, 'w' markdown = Markdown::Document.new output begin odt.parse markdown ensure markdown.close end ensure odt.clean end end |
Instance Method Details
#clean ⇒ Object
46 47 48 49 50 |
# File 'lib/doc2text/odt.rb', line 46 def clean if [extract_path, File.join(extract_path, 'content.xml'), File.join(extract_path, 'mimetype')].all? { |file| File.exist?(file) } FileUtils.rm_r extract_path end end |
#extract_path ⇒ Object
68 69 70 |
# File 'lib/doc2text/odt.rb', line 68 def extract_path File.join File.dirname(@document_path), ".#{File.basename(@document_path)}_#{EXTRACT_EXTENSION}" end |
#open(filename) ⇒ Object
Open file from the current odt
53 54 55 |
# File 'lib/doc2text/odt.rb', line 53 def open(filename) File.open File.join(extract_path, filename), 'r' end |
#parse(markdown) ⇒ Object
24 25 26 27 28 |
# File 'lib/doc2text/odt.rb', line 24 def parse(markdown) content = ::Doc2Text::Odt::Content::Document.new markdown parser = Nokogiri::XML::SAX::Parser.new(content) # { |config| config.strict} parser.parse open 'content.xml' end |
#unpack ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/doc2text/odt.rb', line 34 def unpack Zip::File.open(@document_path) { |zip_file| Dir.mkdir(extract_path) zip_file.each do |entry| zipped_file_extract_path = File.join extract_path, entry.name FileUtils.mkdir_p File.dirname(zipped_file_extract_path) zip_file.extract entry, zipped_file_extract_path end } end |
#xml_file(filename, rood_node_name) ⇒ Object
Parse xml file from the current odt
58 59 60 61 62 63 64 65 66 |
# File 'lib/doc2text/odt.rb', line 58 def xml_file(filename, rood_node_name) Nokogiri::XML::Document.parse(open(filename)) { |config| config.strict } root_node = doc.root if root_node.name != rood_node_name or root_node.namespace.prefix != 'office' raise XmlError, 'Document does not have correct root element' else open(filename) end end |