Class: OoxmlParser::DocxParagraph
- Inherits:
-
OOXMLDocumentObject
- Object
- OOXMLDocumentObject
- OoxmlParser::DocxParagraph
- Defined in:
- lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb
Constant Summary
Constants inherited from OOXMLDocumentObject
OOXMLDocumentObject::DEFAULT_DIRECTORY_FOR_MEDIA
Instance Attribute Summary collapse
-
#align ⇒ Object
Returns the value of attribute align.
-
#background_color ⇒ Object
Returns the value of attribute background_color.
-
#bookmark_end ⇒ Object
Returns the value of attribute bookmark_end.
-
#bookmark_start ⇒ Object
Returns the value of attribute bookmark_start.
-
#borders ⇒ Object
Returns the value of attribute borders.
-
#character_style_array ⇒ Object
Returns the value of attribute character_style_array.
-
#contextual_spacing ⇒ Object
Returns the value of attribute contextual_spacing.
-
#frame_properties ⇒ Object
Returns the value of attribute frame_properties.
-
#horizontal_line ⇒ Object
Returns the value of attribute horizontal_line.
-
#ind ⇒ Object
Returns the value of attribute ind.
-
#keep_lines ⇒ Object
Returns the value of attribute keep_lines.
-
#keep_next ⇒ Object
Returns the value of attribute keep_next.
-
#kinoku ⇒ Object
Returns the value of attribute kinoku.
-
#number ⇒ Object
Returns the value of attribute number.
-
#numbering ⇒ Object
Returns the value of attribute numbering.
-
#orphan_control ⇒ Object
Returns the value of attribute orphan_control.
-
#page_break ⇒ Object
Returns the value of attribute page_break.
-
#page_numbering ⇒ Object
Returns the value of attribute page_numbering.
-
#section_break ⇒ Object
Returns the value of attribute section_break.
-
#sector_properties ⇒ Object
Returns the value of attribute sector_properties.
-
#spacing ⇒ Object
Returns the value of attribute spacing.
-
#style ⇒ Object
Returns the value of attribute style.
-
#tabs ⇒ Object
Returns the value of attribute tabs.
Class Method Summary collapse
- .parse(p_tag, par_number = 0, default_paragraph = DocxParagraph.new, default_character = DocxParagraphRun.new) ⇒ Object
- .parse_paragraph_style(paragraph_pr_tag, paragraph_style = DocxParagraph.new, default_char_style = DocxParagraphRun.new) ⇒ Object
- .parse_paragraph_style_xml(id, paragraph_style, character_style) ⇒ Object
Instance Method Summary collapse
- #==(other) ⇒ Object
- #copy ⇒ Object
-
#initialize ⇒ DocxParagraph
constructor
A new instance of DocxParagraph.
- #nonempty_runs ⇒ Object
- #remove_empty_runs ⇒ Object
Methods inherited from OOXMLDocumentObject
add_to_xmls_stack, copy_file_and_rename_to_zip, copy_media_file, current_xml, dir, encrypted_file?, get_link_from_rels, media_folder, option_enabled?, unzip_file
Constructor Details
#initialize ⇒ DocxParagraph
Returns a new instance of DocxParagraph.
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 14 def initialize @number = 0 @bookmark_start = [] @bookmark_end = [] @align = 'left' @spacing = Spacing.new @background_color = nil @ind = Indents.new @kinoku = false @numbering = nil @character_style_array = [] @horizontal_line = false @page_break = false @borders = Borders.new @keep_lines = false @contextual_spacing = false @sector_properties = nil @page_numbering = false @section_break = nil @style = nil @keep_next = false @orphan_control = true @tabs = [] @frame_properties = nil end |
Instance Attribute Details
#align ⇒ Object
Returns the value of attribute align.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def align @align end |
#background_color ⇒ Object
Returns the value of attribute background_color.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def background_color @background_color end |
#bookmark_end ⇒ Object
Returns the value of attribute bookmark_end.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def bookmark_end @bookmark_end end |
#bookmark_start ⇒ Object
Returns the value of attribute bookmark_start.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def bookmark_start @bookmark_start end |
#borders ⇒ Object
Returns the value of attribute borders.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def borders @borders end |
#character_style_array ⇒ Object
Returns the value of attribute character_style_array.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def character_style_array @character_style_array end |
#contextual_spacing ⇒ Object
Returns the value of attribute contextual_spacing.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def contextual_spacing @contextual_spacing end |
#frame_properties ⇒ Object
Returns the value of attribute frame_properties.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def frame_properties @frame_properties end |
#horizontal_line ⇒ Object
Returns the value of attribute horizontal_line.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def horizontal_line @horizontal_line end |
#ind ⇒ Object
Returns the value of attribute ind.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def ind @ind end |
#keep_lines ⇒ Object
Returns the value of attribute keep_lines.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def keep_lines @keep_lines end |
#keep_next ⇒ Object
Returns the value of attribute keep_next.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def keep_next @keep_next end |
#kinoku ⇒ Object
Returns the value of attribute kinoku.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def kinoku @kinoku end |
#number ⇒ Object
Returns the value of attribute number.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def number @number end |
#numbering ⇒ Object
Returns the value of attribute numbering.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def numbering @numbering end |
#orphan_control ⇒ Object
Returns the value of attribute orphan_control.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def orphan_control @orphan_control end |
#page_break ⇒ Object
Returns the value of attribute page_break.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def page_break @page_break end |
#page_numbering ⇒ Object
Returns the value of attribute page_numbering.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def page_numbering @page_numbering end |
#section_break ⇒ Object
Returns the value of attribute section_break.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def section_break @section_break end |
#sector_properties ⇒ Object
Returns the value of attribute sector_properties.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def sector_properties @sector_properties end |
#spacing ⇒ Object
Returns the value of attribute spacing.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def spacing @spacing end |
#style ⇒ Object
Returns the value of attribute style.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def style @style end |
#tabs ⇒ Object
Returns the value of attribute tabs.
9 10 11 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 9 def tabs @tabs end |
Class Method Details
.parse(p_tag, par_number = 0, default_paragraph = DocxParagraph.new, default_character = DocxParagraphRun.new) ⇒ Object
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 103 def self.parse(p_tag, par_number = 0, default_paragraph = DocxParagraph.new, default_character = DocxParagraphRun.new) paragraph_style = default_paragraph.copy default_character_style = default_character.copy character_styles_array = [] custom_character_style = DocxParagraphRun.new char_number = 0 comments = [] p_tag.xpath('w:bookmarkStart').each do |bookmark_start| paragraph_style.bookmark_start << Bookmark.new(bookmark_start.attribute('id').value, bookmark_start.attribute('name').value) end p_tag.xpath('w:bookmarkEnd').each do |bookmark_end| paragraph_style.bookmark_end << Bookmark.new(bookmark_end.attribute('id').value) end p_tag.xpath('*').each do |p_element| if p_element.name == 'pPr' p_props = p_tag.xpath('w:pPr') DocxParagraph.parse_paragraph_style(p_props, paragraph_style, custom_character_style) p_tag.xpath('w:pict').each do |pict| pict.xpath('v:rect').each do paragraph_style.horizontal_line = true end end elsif p_element.name == 'commentRangeStart' comments << p_element.attribute('id').value elsif p_element.name == 'fldSimple' instruction = p_element.attribute('instr').to_s paragraph_style.page_numbering = true if instruction.include?('PAGE') p_element.xpath('w:r').each do |r_tag| character_style = DocxParagraphRun.parse_character(r_tag, default_character_style.copy, char_number) character_style.page_number = paragraph_style.page_numbering character_style.instruction = instruction character_styles_array << character_style.copy char_number += 1 end elsif p_element.name == 'r' character_style = custom_character_style.copy p_element.xpath('w:instrText').each do |insrt_text| if insrt_text.text.include?('PAGE') paragraph_style.page_numbering = true end end character_style = DocxParagraphRun.parse_character(p_element, character_style, char_number) character_style.comments = comments.dup character_styles_array << character_style.copy unless character_style.shape.nil? character_styles_array.last.shape = character_style.shape end char_number += 1 elsif p_element.name == 'hyperlink' character_style = default_character_style.copy if !p_element.attribute('id').nil? character_style.link = Hyperlink.parse(p_element) else unless p_element.attribute('anchor').nil? character_style.link = p_element.attribute('anchor').value end end p_element.xpath('w:r').each do |r_tag| character_style = DocxParagraphRun.parse_character(r_tag, character_style, char_number) character_styles_array << character_style.copy char_number += 1 end p_element.xpath('w:fldSimple').each do |simple_field| instruction = simple_field.attribute('instr').to_s paragraph_style.page_numbering = true if instruction.include?('PAGE') simple_field.xpath('w:r').each do |r_tag| character_style = DocxParagraphRun.parse_character(r_tag, character_style.copy, char_number) character_style.page_number = paragraph_style.page_numbering character_style.instruction = instruction character_styles_array << character_style.copy char_number += 1 end end elsif p_element.name == 'oMathPara' p_element.xpath('m:oMath').each do |o_math| character_styles_array << DocxFormula.parse(o_math) end elsif p_element.name == 'commentRangeEnd' comments.each_with_index do |comment, index| if comment == p_element.attribute('id').value comments.delete_at(index) break end end end end paragraph_style.number = par_number if character_styles_array.last.class == DocxParagraphRun character_styles_array.last.text = character_styles_array.last.text.rstrip end paragraph_style.character_style_array = character_styles_array paragraph_style end |
.parse_paragraph_style(paragraph_pr_tag, paragraph_style = DocxParagraph.new, default_char_style = DocxParagraphRun.new) ⇒ Object
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 197 def self.parse_paragraph_style(paragraph_pr_tag, paragraph_style = DocxParagraph.new, default_char_style = DocxParagraphRun.new) paragraph_pr_tag.xpath('w:tabs').each do |tabs_node| tabs_node.xpath('w:tab').each { |tab_node| paragraph_style.tabs << ParagraphTab.new(tab_node.attribute('val').value.to_sym, (tab_node.attribute('pos').value.to_f / 566.9).round(2)) } end paragraph_pr_tag.xpath('w:pageBreakBefore').each do |page_break_before| if page_break_before.attribute('val').nil? || page_break_before.attribute('val').value != 'false' paragraph_style.page_break = true end end paragraph_pr_tag.xpath('w:pBdr').each do |paragraph_br| paragraph_style.borders = Borders.new paragraph_br.xpath('w:bottom').each do |bottom| paragraph_style.borders.bottom = BordersProperties.parse(bottom) end paragraph_br.xpath('w:left').each do |left| paragraph_style.borders.left = BordersProperties.parse(left) end paragraph_br.xpath('w:top').each do |top| paragraph_style.borders.top = BordersProperties.parse(top) end paragraph_br.xpath('w:right').each do |right| paragraph_style.borders.right = BordersProperties.parse(right) end paragraph_br.xpath('w:between').each do |between| paragraph_style.borders.between = BordersProperties.parse(between) end paragraph_br.xpath('w:bar').each do || paragraph_style.borders. = BordersProperties.parse() end end paragraph_pr_tag.xpath('w:keepLines').each do |keep_lines| if keep_lines.attribute('val').nil? paragraph_style.keep_lines = true else unless keep_lines.attribute('val').value == 'false' paragraph_style.keep_lines = true end end end paragraph_pr_tag.xpath('w:widowControl').each do |widow_control_node| paragraph_style.orphan_control = OOXMLDocumentObject.option_enabled?(widow_control_node) end paragraph_pr_tag.xpath('w:keepNext').each do |_| paragraph_style.keep_next = true end paragraph_style.contextual_spacing = true unless paragraph_pr_tag.xpath('w:contextualSpacing').empty? paragraph_pr_tag.xpath('w:shd').each do |shd| background_color_string = shd.attribute('fill').value paragraph_style.background_color = Color.from_int16(background_color_string) unless shd.attribute('val').nil? paragraph_style.background_color.set_style(shd.attribute('val').value) end end paragraph_pr_tag.xpath('w:pStyle').each do |p_style| parse_paragraph_style_xml(p_style.attribute('val').value, paragraph_style, default_char_style) end paragraph_pr_tag.xpath('w:ind').each do |ind| paragraph_style.ind = Indents.parse(ind) end paragraph_pr_tag.xpath('w:kinoku').each do paragraph_style.kinoku = true end paragraph_pr_tag.xpath('w:framePr').each do |frame_pr_node| paragraph_style.frame_properties = FrameProperties.parse(frame_pr_node) end paragraph_pr_tag.xpath('w:numPr').each do |num_pr| numbering = Numbering.new num_pr.xpath('w:ilvl').each do |ilvl| numbering.ilvl = ilvl.attribute('val').value end num_pr.xpath('w:numId').each do |num_id| numbering.numbering_properties = NumberingProperties.parse(num_id.attribute('val').value) if File.exist?(OOXMLDocumentObject.path_to_folder + 'word/numbering.xml') end paragraph_style.numbering = numbering end paragraph_pr_tag.xpath('w:jc').each do |jc| paragraph_style.align = jc.attribute('val').value.to_sym unless jc.attribute('val').nil? paragraph_style.align = :justify if jc.attribute('val').value == 'both' end paragraph_pr_tag.xpath('w:framePr').each do |frame_pr_node| paragraph_style.frame_properties = FrameProperties.parse(frame_pr_node) end paragraph_pr_tag.xpath('w:spacing').each do |spacing| unless spacing.attribute('before').nil? paragraph_style.spacing.before = (spacing.attribute('before').value.to_f / 566.9).round(2) end unless spacing.attribute('after').nil? paragraph_style.spacing.after = (spacing.attribute('after').value.to_f / 566.9).round(2) end unless spacing.attribute('lineRule').nil? paragraph_style.spacing.line_rule = spacing.attribute('lineRule').value.sub('atLeast', 'at_least').to_sym end unless spacing.attribute('line').nil? paragraph_style.spacing.line = (paragraph_style.spacing.line_rule == :auto ? (spacing.attribute('line').value.to_f / 240.0).round(2) : (spacing.attribute('line').value.to_f / 566.9).round(2)) end end paragraph_pr_tag.xpath('w:sectPr').each do |sect_pr| paragraph_style.sector_properties = PageProperties.parse(sect_pr, paragraph_style, default_char_style) paragraph_style.section_break = case paragraph_style.sector_properties.type when 'oddPage' 'Odd page' when 'evenPage' 'Even page' when 'continuous' 'Current Page' else 'Next Page' end end paragraph_style end |
.parse_paragraph_style_xml(id, paragraph_style, character_style) ⇒ Object
309 310 311 312 313 314 315 316 317 318 319 320 321 322 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 309 def self.parse_paragraph_style_xml(id, paragraph_style, character_style) doc = Nokogiri::XML(File.open(OOXMLDocumentObject.path_to_folder + 'word/styles.xml')) doc.search('//w:style').each do |style| next unless style.attribute('styleId').value == id style.xpath('w:pPr').each do |p_pr| parse_paragraph_style(p_pr, paragraph_style, character_style) paragraph_style.style = StyleParametres.parse(style) end style.xpath('w:rPr').each do |r_pr| DocxParagraphRun.parse(r_pr, character_style, @default_character) end break end end |
Instance Method Details
#==(other) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 85 def ==(other) character_style_array.each do |current_run| character_style_array.delete(current_run) if current_run.text.empty? end other.character_style_array.each do |current_run| other.character_style_array.delete(current_run) if current_run.text.empty? end ignored_attributes = [:@number] all_instance_variables = instance_variables significan_attribues = all_instance_variables - ignored_attributes significan_attribues.each do |current_attributes| unless instance_variable_get(current_attributes) == other.instance_variable_get(current_attributes) return false end end true end |
#copy ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 40 def copy paragraph = DocxParagraph.new paragraph.number = number paragraph.bookmark_start = @bookmark_start.dup paragraph.bookmark_end = @bookmark_end.dup paragraph.align = @align paragraph.spacing = @spacing.copy paragraph.background_color = @background_color paragraph.ind = @ind.copy paragraph.numbering = @numbering paragraph.character_style_array = @character_style_array paragraph.horizontal_line = @horizontal_line paragraph.page_break = @page_break paragraph.kinoku = @kinoku paragraph.borders = @borders paragraph.keep_lines = @keep_lines paragraph.contextual_spacing = @contextual_spacing paragraph.sector_properties = @sector_properties paragraph.page_numbering = @page_numbering paragraph.section_break = @section_break paragraph.style = @style paragraph.keep_next = @keep_next paragraph.orphan_control = @orphan_control paragraph.tabs = @tabs.dup paragraph.frame_properties = @frame_properties paragraph end |
#nonempty_runs ⇒ Object
68 69 70 71 72 73 74 75 76 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 68 def nonempty_runs @character_style_array.select do |cur_run| if cur_run.is_a?(DocxParagraphRun) (!cur_run.text.empty? || !cur_run.alternate_content.nil? || !cur_run.drawing.nil?) elsif cur_run.is_a?(DocxFormula) true end end end |
#remove_empty_runs ⇒ Object
78 79 80 81 82 83 |
# File 'lib/ooxml_parser/docx_parser/docx_data/document_structure/docx_paragraph.rb', line 78 def remove_empty_runs nonempty = nonempty_runs @character_style_array.each do |cur_run| @character_style_array.delete(cur_run) unless nonempty.include?(cur_run) end end |