Class: OoxmlParser::DocumentStructure

Inherits:
CommonDocumentStructure show all
Includes:
DefaultStyleHelper, DocumentStructureHelpers, DocumentStyleHelper
Defined in:
lib/ooxml_parser/docx_parser/document_structure.rb

Overview

Basic class for DocumentStructure

Instance Attribute Summary collapse

Attributes inherited from CommonDocumentStructure

#content_types, #default_font_size, #default_font_style, #default_font_typeface, #file_path, #root_subfolder, #unpacked_folder, #xmls_stack

Attributes inherited from OOXMLDocumentObject

#parent

Instance Method Summary collapse

Methods included from DocumentStructureHelpers

#with_data?

Methods included from DocumentStyleHelper

#based_on_style, #document_style_by_id, #document_style_by_name, #style_exist?

Methods included from DefaultStyleHelper

#parse_default_style, #parse_styles

Methods inherited from CommonDocumentStructure

#add_to_xmls_stack, #current_xml, #get_link_from_rels

Methods inherited from OOXMLDocumentObject

#boolean_attribute_value, #parse_xml, #with_data?

Methods included from OoxmlObjectAttributeHelper

#attribute_enabled?, #option_enabled?

Methods included from OoxmlDocumentObjectHelper

#to_hash

Constructor Details

#initialize(params = {}) ⇒ DocumentStructure

Returns a new instance of DocumentStructure.



57
58
59
60
61
62
63
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 57

def initialize(params = {})
  @elements = []
  @notes = []
  @document_properties = DocumentProperties.new
  @comments = []
  super
end

Instance Attribute Details

#backgroundDocumentBackground

Returns background of document.

Returns:



29
30
31
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 29

def background
  @background
end

#commentsComments

Returns comment of document.

Returns:



33
34
35
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 33

def comments
  @comments
end

#comments_documentCommentsDocument

Returns comments of whole document.

Returns:

  • (CommentsDocument)

    comments of whole document



45
46
47
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 45

def comments_document
  @comments_document
end

#comments_extendedCommentsExtended

Returns extended comments.

Returns:



47
48
49
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 47

def comments_extended
  @comments_extended
end

#default_paragraph_styleDocxParagraph

Returns default paragraph style.

Returns:



49
50
51
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 49

def default_paragraph_style
  @default_paragraph_style
end

#default_run_styleDocxParagraphRun

Returns default run style.

Returns:



51
52
53
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 51

def default_run_style
  @default_run_style
end

#default_table_paragraph_styleDocxParagraph

Returns default table paragraph style.

Returns:



53
54
55
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 53

def default_table_paragraph_style
  @default_table_paragraph_style
end

#default_table_run_styleDocxParagraphRun

Returns default table run style.

Returns:



55
56
57
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 55

def default_table_run_style
  @default_table_run_style
end

#document_propertiesDocumentProperties

Returns properties of document.

Returns:



31
32
33
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 31

def document_properties
  @document_properties
end

#elementsArray<OOXMLDocumentObject>

Returns list of elements.

Returns:



23
24
25
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 23

def elements
  @elements
end

#notesNote

Returns notes of document.

Returns:

  • (Note)

    notes of document



27
28
29
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 27

def notes
  @notes
end

#numberingNumbering

Returns store numbering data.

Returns:



35
36
37
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 35

def numbering
  @numbering
end

#page_propertiesPageProperties

Returns properties of document.

Returns:



25
26
27
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 25

def page_properties
  @page_properties
end

#relationshipsRelationships

Returns relationships.

Returns:



41
42
43
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 41

def relationships
  @relationships
end

#settingsDocumentSettings

Returns settings.

Returns:



43
44
45
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 43

def settings
  @settings
end

#stylesStyles

Returns styles of document.

Returns:

  • (Styles)

    styles of document



37
38
39
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 37

def styles
  @styles
end

#themePresentationTheme Also known as: theme_colors

Returns theme of docx.

Returns:



39
40
41
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 39

def theme
  @theme
end

Instance Method Details

#==(other) ⇒ True, False

Compare this object to other

Parameters:

  • other (Object)

    any other object

Returns:

  • (True, False)

    result of comparision



70
71
72
73
74
75
76
77
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 70

def ==(other)
  @elements == other.elements &&
    @page_properties == other.page_properties &&
    @notes == other.notes &&
    @background == other.background &&
    @document_properties == other.document_properties &&
    @comments == other.comments
end

#document_stylesArray<DocumentStyle>

Returns style of documents.

Returns:



163
164
165
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 163

def document_styles
  styles.styles
end

#element_by_description(location: :canvas, type: :docx_paragraph) ⇒ OOXMLDocumentObject

Get element by it’s type

Parameters:

  • location (Symbol) (defaults to: :canvas)

    location of object

  • type (Symbol) (defaults to: :docx_paragraph)

    type of object

Returns:



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 83

def element_by_description(location: :canvas, type: :docx_paragraph)
  case location
  when :canvas
    case type
    when :table
      elements[1].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      elements
    when :shape
      elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":paragraph", ":shape")'
    end
  when :footer
    case type
    when :table
      note_by_description(:footer1).elements[0].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      note_by_description(:footer1).elements
    when :shape
      note_by_description(:footer1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":simple", ":shape")'
    end
  when :header
    case type
    when :table
      note_by_description(:header1).elements[0].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      note_by_description(:header1).elements
    when :shape
      note_by_description(:header1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":simple", ":shape")'
    end
  when :comment
    comments[0].paragraphs
  else
    raise 'Wrong global location(Need One of ":canvas", ":footer", ":header", ":comment")'
  end
end

#note_by_description(type) ⇒ Note

Get note by it’s description

Parameters:

  • type (Symbol)

    note type

Returns:



128
129
130
131
132
133
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 128

def note_by_description(type)
  notes.each do |note|
    return note if note.type.to_sym == type
  end
  raise 'There isn\'t this type of the note'
end

#outline(location: :canvas, type: :simple, levels_count: 1) ⇒ Array<String,String>

Return outline type

Parameters:

  • location (Symbol) (defaults to: :canvas)

    location of object

  • type (Symbol) (defaults to: :simple)

    type of object

  • levels_count (Integer) (defaults to: 1)

    count of levels to detect

Returns:

  • (Array<String,String>)

    type of outline



152
153
154
155
156
157
158
159
160
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 152

def outline(location: :canvas, type: :simple, levels_count: 1)
  elements = element_by_description(location: location, type: type)
  set = []
  levels_count.times do |col|
    set[0] = elements[col].numbering.abstruct_numbering.level_list[col].numbering_format.value
    set[1] = elements[col].numbering.abstruct_numbering.level_list[col].text.value
  end
  set
end

#parseDocumentStructure

Parse docx file

Returns:



169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 169

def parse
  @content_types = ContentTypes.new(parent: self).parse
  @root_subfolder = 'word/'
  @comments = []
  @default_paragraph_style = DocxParagraph.new
  @default_run_style = DocxParagraphRun.new(parent: self)
  @theme = PresentationTheme.new(parent: self).parse('word/theme/theme1.xml')
  @relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}word/_rels/document.xml.rels")
  parse_styles
  number = 0
  root_object.add_to_xmls_stack('word/document.xml')
  doc = parse_xml(root_object.current_xml)
  doc.search('//w:document').each do |document|
    document.xpath('w:background').each do |background|
      @background = DocumentBackground.new(parent: self).parse(background)
    end
    document.xpath('w:body').each do |body|
      body.xpath('*').each do |element|
        case element.name
        when 'p'
          child = element.child
          unless child.nil? && @elements.last.instance_of?(Table)
            paragraph_style = default_paragraph_style.dup.parse(element, number, default_run_style, parent: self)
            number += 1
            @elements << paragraph_style.dup
          end
        when 'tbl'
          table = Table.new(parent: self).parse(element,
                                                number,
                                                TableProperties.new)
          number += 1
          @elements << table
        when 'sdt'
          @elements << StructuredDocumentTag.new(parent: self).parse(element)
        end
      end
      body.xpath('w:sectPr').each do |sect_pr|
        @page_properties = PageProperties.new(parent: self).parse(sect_pr,
                                                                  default_paragraph_style,
                                                                  default_run_style)
        @notes = page_properties.notes # keep copy of notes to compatibility with previous docx models
      end
    end
  end
  root_object.xmls_stack.pop
  @document_properties = DocumentProperties.new(parent: self).parse
  @comments = Comments.new(parent: self).parse
  @comments_extended = CommentsExtended.new(parent: self).parse
  @comments_document = Comments.new(parent: self,
                                    file: "#{root_object.unpacked_folder}word/#{relationships.target_by_type('commentsDocument').first}")
                               .parse
  @settings = DocumentSettings.new(parent: self).parse
  self
end

#recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0) ⇒ Array<String,String>

Detect numbering type

Parameters:

  • location (Symbol) (defaults to: :canvas)

    location of object

  • type (Symbol) (defaults to: :simple)

    type of object

  • paragraph_number (Integer) (defaults to: 0)

    number of object

Returns:

  • (Array<String,String>)

    type of numbering



140
141
142
143
144
145
# File 'lib/ooxml_parser/docx_parser/document_structure.rb', line 140

def recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0)
  elements = element_by_description(location: location, type: type)
  lvl_text = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].text.value
  num_format = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].numbering_format.value
  [num_format, lvl_text]
end