Class: OoxmlParser::DocumentStructure

Inherits:
CommonDocumentStructure show all
Includes:
DocumentStructureHelpers, DocumentStyleHelper
Defined in:
lib/ooxml_parser/docx_parser/docx_data/document_structure.rb

Class Attribute Summary collapse

Instance Attribute Summary collapse

Attributes inherited from CommonDocumentStructure

#file_path

Attributes inherited from OOXMLDocumentObject

#parent

Class Method Summary collapse

Instance Method Summary collapse

Methods included from DocumentStructureHelpers

#with_data?

Methods included from DocumentStyleHelper

#based_on_style, #document_style_by_id, #document_style_by_name, #style_exist?

Methods inherited from OOXMLDocumentObject

add_to_xmls_stack, copy_file_and_rename_to_zip, current_xml, dir, encrypted_file?, get_link_from_rels, unzip_file, #with_data?

Methods included from OoxmlDocumentObjectHelper

#to_hash

Constructor Details

#initializeDocumentStructure

Returns a new instance of DocumentStructure.



32
33
34
35
36
37
38
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 32

def initialize
  @elements = []
  @notes = []
  @document_properties = DocumentProperties.new
  @comments = []
  @document_styles = []
end

Class Attribute Details

.default_paragraph_styleObject

Returns the value of attribute default_paragraph_style.



219
220
221
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 219

def default_paragraph_style
  @default_paragraph_style
end

.default_run_styleObject

Returns the value of attribute default_run_style.



220
221
222
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 220

def default_run_style
  @default_run_style
end

.default_table_paragraph_styleObject

Returns the value of attribute default_table_paragraph_style.



218
219
220
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 218

def default_table_paragraph_style
  @default_table_paragraph_style
end

.default_table_run_styleObject

Returns the value of attribute default_table_run_style.



217
218
219
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 217

def default_table_run_style
  @default_table_run_style
end

Instance Attribute Details

#backgroundObject

Returns the value of attribute background.



18
19
20
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 18

def background
  @background
end

#commentsObject

Returns the value of attribute comments.



18
19
20
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 18

def comments
  @comments
end

#comments_extendedCommentsExtended

Returns extended comments.

Returns:



30
31
32
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 30

def comments_extended
  @comments_extended
end

#document_propertiesObject

Returns the value of attribute document_properties.



18
19
20
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 18

def document_properties
  @document_properties
end

#document_stylesArray, DocumentStyle

Returns array of document styles in current document.

Returns:

  • (Array, DocumentStyle)

    array of document styles in current document



21
22
23
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 21

def document_styles
  @document_styles
end

#elementsObject

Returns the value of attribute elements.



18
19
20
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 18

def elements
  @elements
end

#notesObject

Returns the value of attribute notes.



18
19
20
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 18

def notes
  @notes
end

#numberingNumbering

Returns store numbering data.

Returns:



23
24
25
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 23

def numbering
  @numbering
end

#page_propertiesObject

Returns the value of attribute page_properties.



18
19
20
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 18

def page_properties
  @page_properties
end

#settingsDocumentSettings

Returns settings.

Returns:



28
29
30
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 28

def settings
  @settings
end

#stylesStyles

Returns styles of document.

Returns:

  • (Styles)

    styles of document



25
26
27
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 25

def styles
  @styles
end

#theme_colorsObject

Returns the value of attribute theme_colors.



26
27
28
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 26

def theme_colors
  @theme_colors
end

Class Method Details

.parseObject



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 119

def self.parse
  doc_structure = DocumentStructure.new
  OOXMLDocumentObject.root_subfolder = 'word/'
  OOXMLDocumentObject.xmls_stack = []
  @comments = []
  DocumentStructure.default_paragraph_style = DocxParagraph.new
  DocumentStructure.default_run_style = DocxParagraphRun.new
  doc_structure.theme_colors = PresentationTheme.parse('word/theme/theme1.xml')
  OOXMLDocumentObject.add_to_xmls_stack('word/styles.xml')
  doc = Nokogiri::XML(File.open(OOXMLDocumentObject.current_xml))
  # TODO: Remove this old way parsing in favor of doc_structure.styles.document_defaults
  doc.search('//w:docDefaults').each do |doc_defaults|
    doc_defaults.xpath('w:pPrDefault').each do |p_pr_defaults|
      DocumentStructure.default_paragraph_style = DocxParagraph.new.parse(p_pr_defaults, 0)
    end
    doc_defaults.xpath('w:rPrDefault').each do |r_pr_defaults|
      r_pr_defaults.xpath('w:rPr').each do |r_pr|
        DocumentStructure.default_run_style = DocxParagraphRun.new.parse_properties(r_pr)
      end
    end
  end
  parse_default_style
  doc_structure.numbering = Numbering.new(parent: doc_structure).parse
  doc_structure.document_styles = DocumentStyle.parse_list(doc_structure)
  doc_structure.styles = Styles.new(parent: doc_structure).parse
  number = 0
  OOXMLDocumentObject.add_to_xmls_stack('word/document.xml')
  doc = Nokogiri::XML(File.open(OOXMLDocumentObject.current_xml))
  doc.search('//w:document').each do |document|
    document.xpath('w:background').each do |background|
      doc_structure.background = DocumentBackground.new(parent: doc_structure).parse(background)
    end
    document.xpath('w:body').each do |body|
      body.xpath('*').each do |element|
        case element.name
        when 'p'
          child = element.child
          unless child.nil? && doc_structure.elements.last.class == Table
            paragraph_style = DocumentStructure.default_paragraph_style.dup.parse(element, number, DocumentStructure.default_run_style, parent: doc_structure)
            number += 1
            doc_structure.elements << paragraph_style.dup
          end
        when 'tbl'
          table = Table.new(parent: doc_structure).parse(element,
                                                         number,
                                                         TableProperties.new)
          number += 1
          doc_structure.elements << table
        when 'sdt'
          doc_structure.elements << StructuredDocumentTag.new(parent: doc_structure).parse(element)
        end
      end
      body.xpath('w:sectPr').each do |sect_pr|
        doc_structure.page_properties = PageProperties.new(parent: doc_structure).parse(sect_pr,
                                                                                        DocumentStructure.default_paragraph_style,
                                                                                        DocumentStructure.default_run_style)
        doc_structure.notes = doc_structure.page_properties.notes # keep copy of notes to compatibility with previous docx models
      end
    end
  end
  OOXMLDocumentObject.xmls_stack.pop
  doc_structure.document_properties = DocumentProperties.new(parent: doc_structure).parse
  doc_structure.comments = Comment.parse_list(parent: doc_structure)
  doc_structure.comments_extended = CommentsExtended.new(parent: doc_structure).parse
  doc_structure.settings = DocumentSettings.new(parent: doc_structure).parse
  doc_structure
end

.parse_default_styleObject



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 187

def self.parse_default_style
  doc = Nokogiri::XML(File.open(OOXMLDocumentObject.path_to_folder + 'word/styles.xml'))
  doc.search('//w:style').each do |style|
    next if style.attribute('default').nil?
    if (style.attribute('default').value == '1' || style.attribute('default').value == 'on' || style.attribute('default').value == 'true') && style.attribute('type').value == 'paragraph'
      style.xpath('w:pPr').each do |paragraph_pr_tag|
        DocumentStructure.default_paragraph_style = DocxParagraph.new.parse_paragraph_style(paragraph_pr_tag, DocumentStructure.default_run_style)
      end
      style.xpath('w:rPr').each do |character_pr_tag|
        DocumentStructure.default_run_style.parse_properties(character_pr_tag, DocumentStructure.default_run_style)
      end
    elsif (style.attribute('default').value == '1' || style.attribute('default').value == 'on' || style.attribute('default').value == 'true') && style.attribute('type').value == 'character'
      style.xpath('w:rPr').each do |character_pr_tag|
        DocumentStructure.default_run_style.parse_properties(character_pr_tag, DocumentStructure.default_run_style)
      end
    end
  end
  DocumentStructure.default_table_paragraph_style = DocumentStructure.default_paragraph_style.dup
  DocumentStructure.default_table_paragraph_style.spacing = Spacing.new(0, 0, 1, :auto)
  DocumentStructure.default_table_run_style = DocumentStructure.default_run_style.dup
  doc.search('//w:style').each do |style|
    next if style.attribute('default').nil?
    next unless (style.attribute('default').value == '1' || style.attribute('default').value == 'on' || style.attribute('default').value == 'true') && style.attribute('type').value == 'table'
    style.xpath('w:rPr').each do |table_character_pr_tag|
      DocumentStructure.default_table_run_style.parse_properties(table_character_pr_tag, DocumentStructure.default_run_style)
    end
  end
end

Instance Method Details

#==(other) ⇒ Object



40
41
42
43
44
45
46
47
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 40

def ==(other)
  @elements == other.elements &&
    @page_properties == other.page_properties &&
    @notes == other.notes &&
    @background == other.background &&
    @document_properties == other.document_properties &&
    @comments == other.comments
end

#difference(other) ⇒ Object



49
50
51
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 49

def difference(other)
  Hash.object_to_hash(self).diff(Hash.object_to_hash(other))
end

#element_by_description(location: :canvas, type: :docx_paragraph) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 53

def element_by_description(location: :canvas, type: :docx_paragraph)
  case location
  when :canvas
    case type
    when :table
      elements[1].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      elements
    when :shape
      elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":paragraph", ":shape")'
    end
  when :footer
    case type
    when :table
      note_by_description(:footer1).elements[1].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      note_by_description(:footer1).elements
    when :shape
      note_by_description(:footer1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":simple", ":shape")'
    end
  when :header
    case type
    when :table
      note_by_description(:header1).elements[1].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      note_by_description(:header1).elements
    when :shape
      note_by_description(:header1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":simple", ":shape")'
    end
  when :comment
    comments[0].paragraphs
  else
    raise 'Wrong global location(Need One of ":canvas", ":footer", ":header", ":comment")'
  end
end

#note_by_description(type) ⇒ Object



95
96
97
98
99
100
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 95

def note_by_description(type)
  notes.each do |note|
    return note if note.type.to_sym == type
  end
  raise 'There isn\'t this type of the note'
end

#outline(location: :canvas, type: :simple, levels_count: 1) ⇒ Object



109
110
111
112
113
114
115
116
117
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 109

def outline(location: :canvas, type: :simple, levels_count: 1)
  elements = element_by_description(location: location, type: type)
  set = []
  levels_count.times do |col|
    set[0] = elements[col].numbering.abstruct_numbering.level_list[col].numbering_format.value
    set[1] = elements[col].numbering.abstruct_numbering.level_list[col].text.value
  end
  set
end

#recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0) ⇒ Object



102
103
104
105
106
107
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure.rb', line 102

def recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0)
  elements = element_by_description(location: location, type: type)
  lvl_text = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].text.value
  num_format = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].numbering_format.value
  [num_format, lvl_text]
end