Class: OoxmlParser::DocxParagraph

Inherits:
OOXMLDocumentObject show all
Defined in:
lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb

Constant Summary

Constants inherited from OOXMLDocumentObject

OOXMLDocumentObject::DEFAULT_DIRECTORY_FOR_MEDIA

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from OOXMLDocumentObject

add_to_xmls_stack, copy_file_and_rename_to_zip, copy_media_file, current_xml, dir, encrypted_file?, get_link_from_rels, media_folder, option_enabled?, unzip_file

Constructor Details

#initializeDocxParagraph

Returns a new instance of DocxParagraph.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14

def initialize
  @number = 0
  @bookmark_start = []
  @bookmark_end = []
  @align = 'left'
  @spacing = Spacing.new
  @background_color = nil
  @ind = Indents.new
  @kinoku = false
  @numbering = nil
  @character_style_array = []
  @horizontal_line = false
  @page_break = false
  @borders = Borders.new
  @keep_lines = false
  @contextual_spacing = false
  @sector_properties = nil
  @page_numbering = false
  @section_break = nil
  @style = nil
  @keep_next = false
  @orphan_control = true
  @tabs = []
  @frame_properties = nil
end

Instance Attribute Details

#alignObject

Returns the value of attribute align.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def align
  @align
end

#background_colorObject

Returns the value of attribute background_color.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def background_color
  @background_color
end

#bookmark_endObject

Returns the value of attribute bookmark_end.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def bookmark_end
  @bookmark_end
end

#bookmark_startObject

Returns the value of attribute bookmark_start.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def bookmark_start
  @bookmark_start
end

#bordersObject

Returns the value of attribute borders.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def borders
  @borders
end

#character_style_arrayObject

Returns the value of attribute character_style_array.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def character_style_array
  @character_style_array
end

#contextual_spacingObject

Returns the value of attribute contextual_spacing.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def contextual_spacing
  @contextual_spacing
end

#frame_propertiesObject

Returns the value of attribute frame_properties.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def frame_properties
  @frame_properties
end

#horizontal_lineObject

Returns the value of attribute horizontal_line.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def horizontal_line
  @horizontal_line
end

#indObject

Returns the value of attribute ind.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def ind
  @ind
end

#keep_linesObject

Returns the value of attribute keep_lines.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def keep_lines
  @keep_lines
end

#keep_nextObject

Returns the value of attribute keep_next.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def keep_next
  @keep_next
end

#kinokuObject

Returns the value of attribute kinoku.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def kinoku
  @kinoku
end

#numberObject

Returns the value of attribute number.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def number
  @number
end

#numberingObject

Returns the value of attribute numbering.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def numbering
  @numbering
end

#orphan_controlObject

Returns the value of attribute orphan_control.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def orphan_control
  @orphan_control
end

#page_breakObject

Returns the value of attribute page_break.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def page_break
  @page_break
end

#page_numberingObject

Returns the value of attribute page_numbering.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def page_numbering
  @page_numbering
end

#section_breakObject

Returns the value of attribute section_break.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def section_break
  @section_break
end

#sector_propertiesObject

Returns the value of attribute sector_properties.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def sector_properties
  @sector_properties
end

#spacingObject

Returns the value of attribute spacing.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def spacing
  @spacing
end

#styleObject

Returns the value of attribute style.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def style
  @style
end

#tabsObject

Returns the value of attribute tabs.



9
10
11
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9

def tabs
  @tabs
end

Class Method Details

.parse(p_tag, par_number = 0, default_paragraph = DocxParagraph.new, default_character = DocxParagraphRun.new) ⇒ Object



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 103

def self.parse(p_tag, par_number = 0, default_paragraph = DocxParagraph.new, default_character = DocxParagraphRun.new)
  paragraph_style = default_paragraph.copy
  default_character_style = default_character.copy
  character_styles_array = []
  custom_character_style = DocxParagraphRun.new
  char_number = 0
  comments = []
  p_tag.xpath('w:bookmarkStart').each do |bookmark_start|
    paragraph_style.bookmark_start << Bookmark.new(bookmark_start.attribute('id').value, bookmark_start.attribute('name').value)
  end
  p_tag.xpath('w:bookmarkEnd').each do |bookmark_end|
    paragraph_style.bookmark_end << Bookmark.new(bookmark_end.attribute('id').value)
  end
  p_tag.xpath('*').each do |p_element|
    if p_element.name == 'pPr'
      p_props = p_tag.xpath('w:pPr')
      DocxParagraph.parse_paragraph_style(p_props, paragraph_style, custom_character_style)
      p_tag.xpath('w:pict').each do |pict|
        pict.xpath('v:rect').each do
          paragraph_style.horizontal_line = true
        end
      end
    elsif p_element.name == 'commentRangeStart'
      comments << p_element.attribute('id').value
    elsif p_element.name == 'fldSimple'
      instruction = p_element.attribute('instr').to_s
      paragraph_style.page_numbering = true if instruction.include?('PAGE')
      p_element.xpath('w:r').each do |r_tag|
        character_style = DocxParagraphRun.parse_character(r_tag, default_character_style.copy, char_number)
        character_style.page_number = paragraph_style.page_numbering
        character_style.instruction = instruction
        character_styles_array << character_style.copy
        char_number += 1
      end
    elsif p_element.name == 'r'
      character_style = custom_character_style.copy
      p_element.xpath('w:instrText').each do |insrt_text|
        if insrt_text.text.include?('PAGE')
          paragraph_style.page_numbering = true
        end
      end
      character_style = DocxParagraphRun.parse_character(p_element, character_style, char_number)
      character_style.comments = comments.dup
      character_styles_array << character_style.copy
      unless character_style.shape.nil?
        character_styles_array.last.shape = character_style.shape
      end
      char_number += 1
    elsif p_element.name == 'hyperlink'
      character_style = default_character_style.copy
      if !p_element.attribute('id').nil?
        character_style.link = Hyperlink.parse(p_element)
      else
        unless p_element.attribute('anchor').nil?
          character_style.link = p_element.attribute('anchor').value
        end
      end
      p_element.xpath('w:r').each do |r_tag|
        character_style = DocxParagraphRun.parse_character(r_tag, character_style, char_number)
        character_styles_array << character_style.copy
        char_number += 1
      end
      p_element.xpath('w:fldSimple').each do |simple_field|
        instruction = simple_field.attribute('instr').to_s
        paragraph_style.page_numbering = true if instruction.include?('PAGE')
        simple_field.xpath('w:r').each do |r_tag|
          character_style = DocxParagraphRun.parse_character(r_tag, character_style.copy, char_number)
          character_style.page_number = paragraph_style.page_numbering
          character_style.instruction = instruction
          character_styles_array << character_style.copy
          char_number += 1
        end
      end
    elsif p_element.name == 'oMathPara'
      p_element.xpath('m:oMath').each do |o_math|
        character_styles_array << DocxFormula.parse(o_math)
      end
    elsif p_element.name == 'commentRangeEnd'
      comments.each_with_index do |comment, index|
        if comment == p_element.attribute('id').value
          comments.delete_at(index)
          break
        end
      end
    end
  end
  paragraph_style.number = par_number
  if character_styles_array.last.class == DocxParagraphRun
    character_styles_array.last.text = character_styles_array.last.text.rstrip
  end
  paragraph_style.character_style_array = character_styles_array
  paragraph_style
end

.parse_paragraph_style(paragraph_pr_tag, paragraph_style = DocxParagraph.new, default_char_style = DocxParagraphRun.new) ⇒ Object



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 197

def self.parse_paragraph_style(paragraph_pr_tag, paragraph_style = DocxParagraph.new, default_char_style = DocxParagraphRun.new)
  paragraph_pr_tag.xpath('w:tabs').each do |tabs_node|
    tabs_node.xpath('w:tab').each { |tab_node| paragraph_style.tabs << ParagraphTab.new(tab_node.attribute('val').value.to_sym, (tab_node.attribute('pos').value.to_f / 566.9).round(2)) }
  end
  paragraph_pr_tag.xpath('w:pageBreakBefore').each do |page_break_before|
    if page_break_before.attribute('val').nil? || page_break_before.attribute('val').value != 'false'
      paragraph_style.page_break = true
    end
  end
  paragraph_pr_tag.xpath('w:pBdr').each do |paragraph_br|
    paragraph_style.borders = Borders.new
    paragraph_br.xpath('w:bottom').each do |bottom|
      paragraph_style.borders.bottom = BordersProperties.parse(bottom)
    end
    paragraph_br.xpath('w:left').each do |left|
      paragraph_style.borders.left = BordersProperties.parse(left)
    end
    paragraph_br.xpath('w:top').each do |top|
      paragraph_style.borders.top = BordersProperties.parse(top)
    end
    paragraph_br.xpath('w:right').each do |right|
      paragraph_style.borders.right = BordersProperties.parse(right)
    end
    paragraph_br.xpath('w:between').each do |between|
      paragraph_style.borders.between = BordersProperties.parse(between)
    end
    paragraph_br.xpath('w:bar').each do |bar|
      paragraph_style.borders.bar = BordersProperties.parse(bar)
    end
  end
  paragraph_pr_tag.xpath('w:keepLines').each do |keep_lines|
    if keep_lines.attribute('val').nil?
      paragraph_style.keep_lines = true
    else
      unless keep_lines.attribute('val').value == 'false'
        paragraph_style.keep_lines = true
      end
    end
  end
  paragraph_pr_tag.xpath('w:widowControl').each do |widow_control_node|
    paragraph_style.orphan_control = OOXMLDocumentObject.option_enabled?(widow_control_node)
  end
  paragraph_pr_tag.xpath('w:keepNext').each do |_|
    paragraph_style.keep_next = true
  end
  paragraph_style.contextual_spacing = true unless paragraph_pr_tag.xpath('w:contextualSpacing').empty?
  paragraph_pr_tag.xpath('w:shd').each do |shd|
    background_color_string = shd.attribute('fill').value
    paragraph_style.background_color = Color.from_int16(background_color_string)
    unless shd.attribute('val').nil?
      paragraph_style.background_color.set_style(shd.attribute('val').value)
    end
  end
  paragraph_pr_tag.xpath('w:pStyle').each do |p_style|
    parse_paragraph_style_xml(p_style.attribute('val').value, paragraph_style, default_char_style)
  end
  paragraph_pr_tag.xpath('w:ind').each do |ind|
    paragraph_style.ind = Indents.parse(ind)
  end
  paragraph_pr_tag.xpath('w:kinoku').each do
    paragraph_style.kinoku = true
  end
  paragraph_pr_tag.xpath('w:framePr').each do |frame_pr_node|
    paragraph_style.frame_properties = FrameProperties.parse(frame_pr_node)
  end
  paragraph_pr_tag.xpath('w:numPr').each do |num_pr|
    numbering = Numbering.new
    num_pr.xpath('w:ilvl').each do |ilvl|
      numbering.ilvl = ilvl.attribute('val').value
    end
    num_pr.xpath('w:numId').each do |num_id|
      numbering.numbering_properties = NumberingProperties.parse(num_id.attribute('val').value) if File.exist?(OOXMLDocumentObject.path_to_folder + 'word/numbering.xml')
    end
    paragraph_style.numbering = numbering
  end
  paragraph_pr_tag.xpath('w:jc').each do |jc|
    paragraph_style.align = jc.attribute('val').value.to_sym unless jc.attribute('val').nil?
    paragraph_style.align = :justify if jc.attribute('val').value == 'both'
  end
  paragraph_pr_tag.xpath('w:framePr').each do |frame_pr_node|
    paragraph_style.frame_properties = FrameProperties.parse(frame_pr_node)
  end
  paragraph_pr_tag.xpath('w:spacing').each do |spacing|
    unless spacing.attribute('before').nil?
      paragraph_style.spacing.before = (spacing.attribute('before').value.to_f / 566.9).round(2)
    end
    unless spacing.attribute('after').nil?
      paragraph_style.spacing.after = (spacing.attribute('after').value.to_f / 566.9).round(2)
    end
    unless spacing.attribute('lineRule').nil?
      paragraph_style.spacing.line_rule = spacing.attribute('lineRule').value.sub('atLeast', 'at_least').to_sym
    end
    unless spacing.attribute('line').nil?
      paragraph_style.spacing.line = (paragraph_style.spacing.line_rule == :auto ? (spacing.attribute('line').value.to_f / 240.0).round(2) : (spacing.attribute('line').value.to_f / 566.9).round(2))
    end
  end
  paragraph_pr_tag.xpath('w:sectPr').each do |sect_pr|
    paragraph_style.sector_properties = PageProperties.parse(sect_pr, paragraph_style, default_char_style)
    paragraph_style.section_break = case paragraph_style.sector_properties.type
                                    when 'oddPage'
                                      'Odd page'
                                    when 'evenPage'
                                      'Even page'
                                    when 'continuous'
                                      'Current Page'
                                    else
                                      'Next Page'
                                    end
  end
  paragraph_style
end

.parse_paragraph_style_xml(id, paragraph_style, character_style) ⇒ Object



309
310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 309

def self.parse_paragraph_style_xml(id, paragraph_style, character_style)
  doc = Nokogiri::XML(File.open(OOXMLDocumentObject.path_to_folder + 'word/styles.xml'))
  doc.search('//w:style').each do |style|
    next unless style.attribute('styleId').value == id
    style.xpath('w:pPr').each do |p_pr|
      parse_paragraph_style(p_pr, paragraph_style, character_style)
      paragraph_style.style = StyleParametres.parse(style)
    end
    style.xpath('w:rPr').each do |r_pr|
      DocxParagraphRun.parse(r_pr, character_style, @default_character)
    end
    break
  end
end

Instance Method Details

#==(other) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 85

def ==(other)
  character_style_array.each do |current_run|
    character_style_array.delete(current_run) if current_run.text.empty?
  end
  other.character_style_array.each do |current_run|
    other.character_style_array.delete(current_run) if current_run.text.empty?
  end
  ignored_attributes = [:@number]
  all_instance_variables = instance_variables
  significan_attribues = all_instance_variables - ignored_attributes
  significan_attribues.each do |current_attributes|
    unless instance_variable_get(current_attributes) == other.instance_variable_get(current_attributes)
      return false
    end
  end
  true
end

#copyObject



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 40

def copy
  paragraph = DocxParagraph.new
  paragraph.number = number
  paragraph.bookmark_start = @bookmark_start.dup
  paragraph.bookmark_end = @bookmark_end.dup
  paragraph.align = @align
  paragraph.spacing = @spacing.copy
  paragraph.background_color = @background_color
  paragraph.ind = @ind.copy
  paragraph.numbering = @numbering
  paragraph.character_style_array = @character_style_array
  paragraph.horizontal_line = @horizontal_line
  paragraph.page_break = @page_break
  paragraph.kinoku = @kinoku
  paragraph.borders = @borders
  paragraph.keep_lines = @keep_lines
  paragraph.contextual_spacing = @contextual_spacing
  paragraph.sector_properties = @sector_properties
  paragraph.page_numbering = @page_numbering
  paragraph.section_break = @section_break
  paragraph.style = @style
  paragraph.keep_next = @keep_next
  paragraph.orphan_control = @orphan_control
  paragraph.tabs = @tabs.dup
  paragraph.frame_properties = @frame_properties
  paragraph
end

#nonempty_runsObject



68
69
70
71
72
73
74
75
76
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 68

def nonempty_runs
  @character_style_array.select do |cur_run|
    if cur_run.is_a?(DocxParagraphRun)
      (!cur_run.text.empty? || !cur_run.alternate_content.nil? || !cur_run.drawing.nil?)
    elsif cur_run.is_a?(DocxFormula)
      true
    end
  end
end

#remove_empty_runsObject



78
79
80
81
82
83
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 78

def remove_empty_runs
  nonempty = nonempty_runs
  @character_style_array.each do |cur_run|
    @character_style_array.delete(cur_run) unless nonempty.include?(cur_run)
  end
end