Class: Docx::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/docx/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ Parser

Returns a new instance of Parser.



9
10
11
12
13
14
15
16
17
# File 'lib/docx/parser.rb', line 9

def initialize(path)
  @zip = Zip::ZipFile.open(path)
  @xml = @zip.read('word/document.xml')
  @doc = Nokogiri::XML(@xml)
  if block_given?
    yield self
    @zip.close
  end
end

Instance Attribute Details

#docObject (readonly)

Returns the value of attribute doc.



8
9
10
# File 'lib/docx/parser.rb', line 8

def doc
  @doc
end

#xmlObject (readonly)

Returns the value of attribute xml.



8
9
10
# File 'lib/docx/parser.rb', line 8

def xml
  @xml
end

#zipObject (readonly)

Returns the value of attribute zip.



8
9
10
# File 'lib/docx/parser.rb', line 8

def zip
  @zip
end

Instance Method Details

#bookmarksObject



23
24
25
26
27
28
29
30
# File 'lib/docx/parser.rb', line 23

def bookmarks
  bkmrks_hsh = Hash.new
  bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node }
  # auto-generated by office 2010
  bkmrks_ary.reject! {|b| b.name == "_GoBack" }
  bkmrks_ary.each {|b| bkmrks_hsh[b.name] = b }
  bkmrks_hsh
end

#paragraphsObject



19
20
21
# File 'lib/docx/parser.rb', line 19

def paragraphs
  @doc.xpath('//w:document//w:body//w:p').map { |p_node| parse_paragraph_from p_node }
end