Class: Doc2Text::Odt::Document

Inherits:
Object
  • Object
show all
Defined in:
lib/doc2text/odt.rb

Constant Summary collapse

EXTRACT_EXTENSION =
'unpacked_odt'

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(document_path) ⇒ Document

Returns a new instance of Document.



30
31
32
# File 'lib/doc2text/odt.rb', line 30

def initialize(document_path)
  @document_path = document_path
end

Class Method Details

.parse_and_save(input, output_filename) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/doc2text/odt.rb', line 8

def self.parse_and_save(input, output_filename)
  odt = new input
  begin
    odt.unpack
    output = File.open output_filename, 'w'
    markdown = Markdown::Document.new output
    begin
      odt.parse markdown
    ensure
      markdown.close
    end
  ensure
    odt.clean
  end
end

Instance Method Details

#cleanObject



46
47
48
49
50
# File 'lib/doc2text/odt.rb', line 46

def clean
  if [extract_path, File.join(extract_path, 'content.xml'), File.join(extract_path, 'mimetype')].all? { |file| File.exist?(file) }
    FileUtils.rm_r extract_path
  end
end

#extract_pathObject



68
69
70
# File 'lib/doc2text/odt.rb', line 68

def extract_path
  File.join File.dirname(@document_path), ".#{File.basename(@document_path)}_#{EXTRACT_EXTENSION}"
end

#open(filename) ⇒ Object

Open file from the current odt



53
54
55
# File 'lib/doc2text/odt.rb', line 53

def open(filename)
  File.open File.join(extract_path, filename), 'r'
end

#parse(markdown) ⇒ Object



24
25
26
27
28
# File 'lib/doc2text/odt.rb', line 24

def parse(markdown)
  content = ::Doc2Text::Odt::Content::Document.new markdown
  parser = Nokogiri::XML::SAX::Parser.new(content) # { |config| config.strict}
  parser.parse open 'content.xml'
end

#unpackObject



34
35
36
37
38
39
40
41
42
43
44
# File 'lib/doc2text/odt.rb', line 34

def unpack
  Zip::File.open(@document_path) {
      |zip_file|
    Dir.mkdir(extract_path)
    zip_file.each do |entry|
      zipped_file_extract_path = File.join extract_path, entry.name
      FileUtils.mkdir_p File.dirname(zipped_file_extract_path)
      zip_file.extract entry, zipped_file_extract_path
    end
  }
end

#xml_file(filename, rood_node_name) ⇒ Object

Parse xml file from the current odt



58
59
60
61
62
63
64
65
66
# File 'lib/doc2text/odt.rb', line 58

def xml_file(filename, rood_node_name)
  Nokogiri::XML::Document.parse(open(filename)) { |config| config.strict }
  root_node = doc.root
  if root_node.name != rood_node_name or root_node.namespace.prefix != 'office'
    raise XmlError, 'Document does not have correct root element'
  else
    open(filename)
  end
end