Class: OoxmlParser::DocxParagraph

Inherits:
OOXMLDocumentObject show all
Includes:
DocxParagraphHelper
Defined in:
lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb

Instance Attribute Summary collapse

Attributes inherited from OOXMLDocumentObject

#parent

Class Method Summary collapse

Instance Method Summary collapse

Methods included from DocxParagraphHelper

#comment_extend_data

Methods inherited from OOXMLDocumentObject

add_to_xmls_stack, copy_file_and_rename_to_zip, current_xml, dir, encrypted_file?, get_link_from_rels, unzip_file

Methods included from OoxmlDocumentObjectHelper

#to_hash

Constructor Details

#initialize(parent: nil) ⇒ DocxParagraph

Returns a new instance of DocxParagraph.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 31

def initialize(parent: nil)
  @number = 0
  @bookmark_start = []
  @bookmark_end = []
  @align = 'left'
  @spacing = Spacing.new
  @ind = Indents.new
  @kinoku = false
  @character_style_array = []
  @horizontal_line = false
  @page_break = false
  @borders = Borders.new
  @keep_lines = false
  @contextual_spacing = false
  @page_numbering = false
  @keep_next = false
  @orphan_control = true
  @parent = parent
end

Instance Attribute Details

#alignObject

Returns the value of attribute align.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def align
  @align
end

#background_colorObject

Returns the value of attribute background_color.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def background_color
  @background_color
end

#bookmark_endObject

Returns the value of attribute bookmark_end.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def bookmark_end
  @bookmark_end
end

#bookmark_startObject

Returns the value of attribute bookmark_start.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def bookmark_start
  @bookmark_start
end

#bordersObject

Returns the value of attribute borders.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def borders
  @borders
end

#character_style_arrayObject Also known as: runs

Returns the value of attribute character_style_array.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def character_style_array
  @character_style_array
end

#contextual_spacingObject

Returns the value of attribute contextual_spacing.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def contextual_spacing
  @contextual_spacing
end

#frame_propertiesObject

Returns the value of attribute frame_properties.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def frame_properties
  @frame_properties
end

#horizontal_lineObject

Returns the value of attribute horizontal_line.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def horizontal_line
  @horizontal_line
end

Returns hyperlink in paragraph.

Returns:



19
20
21
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 19

def hyperlink
  @hyperlink
end

#indObject

Returns the value of attribute ind.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def ind
  @ind
end

#insertedInserted

Returns data inserted by review.

Returns:

  • (Inserted)

    data inserted by review



23
24
25
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 23

def inserted
  @inserted
end

#keep_linesObject

Returns the value of attribute keep_lines.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def keep_lines
  @keep_lines
end

#keep_nextObject

Returns the value of attribute keep_next.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def keep_next
  @keep_next
end

#kinokuObject

Returns the value of attribute kinoku.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def kinoku
  @kinoku
end

#numberObject

Returns the value of attribute number.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def number
  @number
end

#numberingObject

Returns the value of attribute numbering.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def numbering
  @numbering
end

#orphan_controlObject

Returns the value of attribute orphan_control.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def orphan_control
  @orphan_control
end

#page_breakObject

Returns the value of attribute page_break.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def page_break
  @page_break
end

#page_numberingObject

Returns the value of attribute page_numbering.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def page_numbering
  @page_numbering
end

#paragraph_idInteger

Returns id of paragraph (for comment).

Returns:

  • (Integer)

    id of paragraph (for comment)



25
26
27
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 25

def paragraph_id
  @paragraph_id
end

#paragraph_propertiesParagraphProperties

Returns Properties of current paragraph.

Returns:



21
22
23
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 21

def paragraph_properties
  @paragraph_properties
end

#sdtStructuredDocumentTag

Returns structured document tag data.

Returns:



29
30
31
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 29

def sdt
  @sdt
end

#section_breakObject

Returns the value of attribute section_break.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def section_break
  @section_break
end

#sector_propertiesObject

Returns the value of attribute sector_properties.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def sector_properties
  @sector_properties
end

#spacingObject

Returns the value of attribute spacing.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def spacing
  @spacing
end

#styleObject

Returns the value of attribute style.



14
15
16
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def style
  @style
end

#text_idInteger

Returns id of text (for comment).

Returns:

  • (Integer)

    id of text (for comment)



27
28
29
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 27

def text_id
  @text_id
end

Class Method Details

.parse_paragraph_style_xml(id, paragraph_style, character_style) ⇒ Object



261
262
263
264
265
266
267
268
269
270
271
272
273
274
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 261

def self.parse_paragraph_style_xml(id, paragraph_style, character_style)
  doc = Nokogiri::XML(File.open(OOXMLDocumentObject.path_to_folder + 'word/styles.xml'))
  doc.search('//w:style').each do |style|
    next unless style.attribute('styleId').value == id
    style.xpath('w:pPr').each do |p_pr|
      paragraph_style.parse_paragraph_style(p_pr, character_style)
      paragraph_style.style = StyleParametres.new(parent: paragraph_style).parse(style)
    end
    style.xpath('w:rPr').each do |r_pr|
      character_style.parse_properties(r_pr, DocumentStructure.default_run_style)
    end
    break
  end
end

Instance Method Details

#==(other) ⇒ Object



90
91
92
93
94
95
96
97
98
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 90

def ==(other)
  ignored_attributes = %i[@number @parent]
  all_instance_variables = instance_variables
  significan_attribues = all_instance_variables - ignored_attributes
  significan_attribues.each do |current_attributes|
    return false unless instance_variable_get(current_attributes) == other.instance_variable_get(current_attributes)
  end
  true
end

#initialize_copy(source) ⇒ Object



51
52
53
54
55
56
57
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 51

def initialize_copy(source)
  super
  @bookmark_start = source.bookmark_start.clone
  @bookmark_end = source.bookmark_end.clone
  @character_style_array = source.character_style_array.clone
  @spacing = source.spacing.clone
end

#nonempty_runsObject



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 59

def nonempty_runs
  @character_style_array.select do |cur_run|
    if cur_run.is_a?(DocxParagraphRun)
      (!cur_run.text.empty? ||
          !cur_run.alternate_content.nil? ||
          !cur_run.drawing.nil? ||
          !cur_run.object.nil? ||
          !cur_run.shape.nil? ||
          !cur_run.footnote.nil? ||
          !cur_run.endnote.nil?
      )
    elsif cur_run.is_a?(DocxFormula)
      true
    end
  end
end

#parse(node, par_number = 0, default_character = DocxParagraphRun.new, parent: nil) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 100

def parse(node, par_number = 0, default_character = DocxParagraphRun.new, parent: nil)
  @parent = parent
  default_character_style = default_character.dup
  character_styles_array = []
  custom_character_style = default_character_style.dup
  char_number = 0
  comments = []
  node.attributes.each do |key, value|
    case key
    when 'paraId'
      @paragraph_id = value.value.to_i
    when 'textId'
      @text_id = value.value.to_i
    end
  end
  node.xpath('*').each do |node_child|
    case node_child.name
    when 'bookmarkStart'
      @bookmark_start << Bookmark.new(parent: self).parse(node_child)
    when 'bookmarkEnd'
      @bookmark_end << Bookmark.new(parent: self).parse(node_child)
    when 'pPr'
      parse_paragraph_style(node_child, custom_character_style)
      node.xpath('w:pict').each do |pict|
        pict.xpath('v:rect').each do
          @horizontal_line = true
        end
      end
      @paragraph_properties = ParagraphProperties.new(parent: self).parse(node_child)
    when 'commentRangeStart'
      comments << node_child.attribute('id').value
    when 'fldSimple'
      instruction = node_child.attribute('instr').to_s
      @page_numbering = true if instruction.include?('PAGE')
      node_child.xpath('w:r').each do |r_tag|
        character_style = default_character_style.dup
        character_style.parse(r_tag, char_number, parent: parent)
        character_style.page_number = @page_numbering
        character_style.instruction = instruction
        character_styles_array << character_style.dup
        char_number += 1
      end
    when 'r'
      character_style = custom_character_style.dup
      node_child.xpath('w:instrText').each do |insrt_text|
        @page_numbering = true if insrt_text.text.include?('PAGE')
      end
      character_style.parse(node_child, char_number, parent: self)
      character_style.comments = comments.dup
      character_styles_array << character_style.dup
      character_styles_array.last.shape = character_style.shape unless character_style.shape.nil?
      char_number += 1
    when 'hyperlink'
      @hyperlink = Hyperlink.new(parent: self).parse(node_child)
      character_style = default_character_style.dup
      if !node_child.attribute('id').nil?
        character_style.link = Hyperlink.new(parent: character_style).parse(node_child)
      else
        character_style.link = node_child.attribute('anchor').value unless node_child.attribute('anchor').nil?
      end
      node_child.xpath('w:r').each do |r_tag|
        character_style.parse(r_tag, char_number, parent: parent)
        character_styles_array << character_style.dup
        char_number += 1
      end
      node_child.xpath('w:fldSimple').each do |simple_field|
        instruction = simple_field.attribute('instr').to_s
        @page_numbering = true if instruction.include?('PAGE')
        simple_field.xpath('w:r').each do |r_tag|
          character_style.parse(r_tag, char_number, parent: self)
          character_style.page_number = @page_numbering
          character_style.instruction = instruction
          character_styles_array << character_style.dup
          char_number += 1
        end
      end
    when 'oMathPara'
      node_child.xpath('m:oMath').each do |o_math|
        character_styles_array << DocxFormula.new(parent: self).parse(o_math)
      end
    when 'commentRangeEnd'
      comments.each_with_index do |comment, index|
        if comment == node_child.attribute('id').value
          comments.delete_at(index)
          break
        end
      end
    when 'ins'
      @inserted = Inserted.new(parent: self).parse(node_child)
    when 'sdt'
      @sdt = StructuredDocumentTag.new(parent: self).parse(node_child)
    end
  end
  @number = par_number
  character_styles_array.last.text = character_styles_array.last.text.rstrip if character_styles_array.last.class == DocxParagraphRun
  @character_style_array = character_styles_array
  @parent = parent
  self
end

#parse_paragraph_style(node, default_char_style = DocxParagraphRun.new) ⇒ Object



200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 200

def parse_paragraph_style(node, default_char_style = DocxParagraphRun.new)
  node.xpath('*').each do |node_child|
    case node_child.name
    when 'pageBreakBefore'
      @page_break = true if node_child.attribute('val').nil? || node_child.attribute('val').value != 'false'
    when 'pBdr'
      @borders = ParagraphBorders.new(parent: self).parse(node_child)
    when 'keepLines'
      if node_child.attribute('val').nil?
        @keep_lines = true
      else
        @keep_lines = true unless node_child.attribute('val').value == 'false'
      end
    when 'widowControl'
      @orphan_control = option_enabled?(node_child)
    when 'keepNext'
      @keep_next = true
    when 'contextualSpacing'
      @contextual_spacing = true
    when 'shd'
      background_color_string = node_child.attribute('fill').value
      @background_color = Color.new(parent: self).parse_hex_string(background_color_string)
      @background_color.set_style(node_child.attribute('val').value.to_sym) unless node_child.attribute('val').nil?
    when 'pStyle'
      DocxParagraph.parse_paragraph_style_xml(node_child.attribute('val').value, self, default_char_style)
    when 'ind'
      @ind = DocumentStructure.default_paragraph_style.ind.dup.parse(node_child)
    when 'kinoku'
      @kinoku = true
    when 'framePr'
      @frame_properties = FrameProperties.new(parent: self).parse(node_child)
    when 'numPr'
      @numbering = NumberingProperties.new(parent: self).parse(node_child)
    when 'jc'
      @align = node_child.attribute('val').value.to_sym unless node_child.attribute('val').nil?
      @align = :justify if node_child.attribute('val').value == 'both'
    when 'spacing'
      @spacing.before = (node_child.attribute('before').value.to_f / 566.9).round(2) unless node_child.attribute('before').nil?
      @spacing.after = (node_child.attribute('after').value.to_f / 566.9).round(2) unless node_child.attribute('after').nil?
      @spacing.line_rule = node_child.attribute('lineRule').value.sub('atLeast', 'at_least').to_sym unless node_child.attribute('lineRule').nil?
      unless node_child.attribute('line').nil?
        @spacing.line = (@spacing.line_rule == :auto ? (node_child.attribute('line').value.to_f / 240.0).round(2) : (node_child.attribute('line').value.to_f / 566.9).round(2))
      end
    when 'sectPr'
      @sector_properties = PageProperties.new(parent: self).parse(node_child, self, default_char_style)
      @section_break = case @sector_properties.type
                       when 'oddPage'
                         'Odd page'
                       when 'evenPage'
                         'Even page'
                       when 'continuous'
                         'Current Page'
                       else
                         'Next Page'
                       end
    end
  end
  @parent = parent
  self
end

#remove_empty_runsObject



83
84
85
86
87
88
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 83

def remove_empty_runs
  nonempty = nonempty_runs
  @character_style_array.each do |cur_run|
    @character_style_array.delete(cur_run) unless nonempty.include?(cur_run)
  end
end

#with_data?True, false

Returns if structure contain any user data.

Returns:

  • (True, false)

    if structure contain any user data



79
80
81
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 79

def with_data?
  !nonempty_runs.empty?
end