Class: Avv2word::Document

Inherits:
Object
  • Object
show all
Extended by:
TemplatesHelper
Includes:
XSLTHelper
Defined in:
lib/avv2word/document.rb

Class Method Summary collapse

Instance Method Summary collapse

Methods included from TemplatesHelper

template_file

Methods included from XSLTHelper

#document_xslt, #xslt, #xslt_path

Constructor Details

#initialize(template_path) ⇒ Document

Returns a new instance of Document.



105
106
107
108
109
# File 'lib/avv2word/document.rb', line 105

def initialize(template_path)
  @replaceable_files = {}
  @template_path = template_path
  @image_files = []
end

Class Method Details

.content_types_xml_fileObject



96
97
98
# File 'lib/avv2word/document.rb', line 96

def content_types_xml_file
  '[Content_Types].xml'
end

.create(content, template_name = nil, extras = false) ⇒ Object



49
50
51
52
53
54
55
56
# File 'lib/avv2word/document.rb', line 49

def create(content, template_name = nil, extras = false)
  content = extract_images_html(content)
  content = escape_footnotes(content)
  template_name += extension if template_name && !template_name.end_with?(extension)
  document = new(template_file(template_name))
  document.replace_files(content, extras)
  document.generate
end

.create_and_save(content, file_path, template_name = nil, extras = false) ⇒ Object



58
59
60
61
62
# File 'lib/avv2word/document.rb', line 58

def create_and_save(content, file_path, template_name = nil, extras = false)
  File.open(file_path, 'wb') do |out|
    out << create(content, template_name, extras)
  end
end

.create_with_content(template, content, extras = false) ⇒ Object



64
65
66
67
68
69
70
# File 'lib/avv2word/document.rb', line 64

def create_with_content(template, content, extras = false)
  content = extract_images_html(content)
  template += extension unless template.end_with?(extension)
  document = new(template_file(template))
  document.replace_files(content, extras)
  document.generate
end

.doc_xml_fileObject



76
77
78
# File 'lib/avv2word/document.rb', line 76

def doc_xml_file
  'word/document.xml'
end

.escape_footnotes(html) ⇒ Object



38
39
40
41
42
43
44
45
46
47
# File 'lib/avv2word/document.rb', line 38

def escape_footnotes(html)
  doc = Nokogiri::HTML.fragment(html)
  nodes_with_footnotes = doc.css("footnote")
  nodes_with_footnotes.each do |node|
    node.name = "footnote"
    node.content = node.attributes["data-value"]
    node.attributes.map{|k,v| node.attributes[k].remove }
  end
  doc.to_html
end

.extensionObject



72
73
74
# File 'lib/avv2word/document.rb', line 72

def extension
  '.docx'
end

.extract_images_html(html) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/avv2word/document.rb', line 8

def extract_images_html(html)
  require "base64"
  require "fileutils"

  # create working dir
  FileUtils.mkdir_p("tmp_imgs")
  resize = 1 # future feature; half size: 0.5

  doc = Nokogiri::HTML.fragment(html)
  doc.css("img").each_with_index do |img_elm,i|
    mime_type, img_data = img_elm.attributes["src"].value.split(",")
    # ^--data:image/jpeg;base64
    ext = mime_type.match(/image\/(\w+?);base64/)[1]
    img_path = "tmp_imgs/image#{i+1}.#{ext}"
    File.open(img_path,"wb"){|f| f.write Base64.decode64(img_data)}
    img_elm.attributes["src"].value = img_path

    match_dimensions = %x( identify #{img_path} ).match(/(?<x>\d+)x(?<y>\d+)/)
    if match_dimensions
      img_elm["style"] = "width:#{match_dimensions[:x].to_i*resize}px;height:#{match_dimensions[:y].to_i*resize}px"
    else
      img_elm["style"] = "width:100px;height:100px"
    end
    # img_elm["style"] = "width:100.5px;height:100.5px"
    # alt: style="width:350px;height:150px"
    # alt: img_elm["data-width"] = "236px"
  end
  doc.to_html
end


88
89
90
# File 'lib/avv2word/document.rb', line 88

def footer_xml_file
  'word/footer.xml'
end

.footnotes_xml_fileObject



100
101
102
# File 'lib/avv2word/document.rb', line 100

def footnotes_xml_file
  'word/footnotes.xml'
end

.header_xml_fileObject



92
93
94
# File 'lib/avv2word/document.rb', line 92

def header_xml_file
  'word/header.xml'
end

.numbering_xml_fileObject



80
81
82
# File 'lib/avv2word/document.rb', line 80

def numbering_xml_file
  'word/numbering.xml'
end

.relations_xml_fileObject



84
85
86
# File 'lib/avv2word/document.rb', line 84

def relations_xml_file
  'word/_rels/document.xml.rels'
end

Instance Method Details

#generateObject

Generate a string representing the contents of a docx file.



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/avv2word/document.rb', line 114

def generate
  Zip::File.open(@template_path) do |template_zip|
    buffer = Zip::OutputStream.write_buffer do |out|
      template_zip.each do |entry|
        next if entry.name =~ /\/$/
        out.put_next_entry entry.name
        if @replaceable_files[entry.name] && entry.name == Document.doc_xml_file
          source = entry.get_input_stream.read
          # Change only the body of document. TODO: Improve this...
          source = source.sub(/(<w:body>)((.|\n)*?)(<w:sectPr)/, "\\1#{@replaceable_files[entry.name]}\\4")
          # add header and footer only if they really exist
          if entry.name == 'word/document.xml'
            source.sub!('<!--<w:headerReference w:type="default" r:id="rId8"/>-->','<w:headerReference w:type="default" r:id="rId8"/>') if @header
            source.sub!('<!--<w:footerReference w:type="default" r:id="rId9"/>-->','<w:footerReference w:type="default" r:id="rId9"/>') if @footer
          end
          out.write(source)
        elsif @replaceable_files[entry.name]
          out.write(@replaceable_files[entry.name])
        elsif entry.name == Document.content_types_xml_file
          raw_file = entry.get_input_stream.read
          content_types = @image_files.empty? ? raw_file : inject_image_content_types(raw_file)
          out.write(content_types)
        else
          out.write(template_zip.read(entry.name))
        end
      end
      unless @image_files.empty?
      #stream the image files into the media folder using open-uri
        @image_files.each do |hash|
          out.put_next_entry("word/media/#{hash[:filename]}")
          open(hash[:url], 'rb') do |f|
            out.write(f.read)
          end
        end
      end

      %w( word/_rels/header.xml.rels
          word/_rels/footer.xml.rels
      ).each do |f|
        if @replaceable_files[f]
          out.put_next_entry f
          out.write(@replaceable_files[f])
        end
      end

    end
    buffer.string
  end
end

#replace_files(html, extras = false) ⇒ Object



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/avv2word/document.rb', line 164

def replace_files(html, extras = false)
  html = '<body></body>' if html.nil? || html.empty?

  header_html = (html =~ /(<header>.*?<\/header>)/m ? $1 : '')
  footer_html = (html =~ /(<footer>.*?<\/footer>)/m ? $1 : '')
  original_source = Nokogiri::HTML(html.gsub(/>[\t\n\r\f]+</, '><')) # whitespace characters without space; nokogiri changes &nbsp; to \u00a0
  header = Nokogiri::HTML(header_html.gsub(/>\s+</, '><'))
  footer = Nokogiri::HTML(footer_html.gsub(/>\s+</, '><'))
  @header = (header_html.empty? ? false : true)
  @footer = (footer_html.empty? ? false : true)
  transform_and_replace(header, xslt_path('header'), Document.header_xml_file)
  transform_and_replace(footer, xslt_path('footer'), Document.footer_xml_file)
  transform_and_replace(original_source, xslt_path('relations'), Document.relations_xml_file)
  source = xslt(stylesheet_name: 'cleanup').transform(original_source)

  transform_and_replace(source, xslt_path('numbering'), Document.numbering_xml_file)
  transform_doc_xml(source, extras)

  local_images(source)
  local_images(footer, :footer) if @footer
  local_images(header, :header) if @header
  
  add_footnotes(source.css("footnote").map{ |footnote| footnote.text })
  add_comments(source.css("comment")) unless source.css("comment").empty?
end

#transform_doc_xml(source, extras = false) ⇒ Object



190
191
192
193
194
# File 'lib/avv2word/document.rb', line 190

def transform_doc_xml(source, extras = false)
  transformed_source = xslt(stylesheet_name: 'cleanup').transform(source)
  transformed_source = xslt(stylesheet_name: 'inline_elements').transform(transformed_source)
  transform_and_replace(transformed_source, document_xslt(extras), Document.doc_xml_file, extras)
end