Class: Epuber::Compiler::XHTMLProcessor
- Inherits:
-
Object
- Object
- Epuber::Compiler::XHTMLProcessor
- Defined in:
- lib/epuber/compiler/xhtml_processor.rb
Defined Under Namespace
Classes: UnparseableLinkError
Class Method Summary collapse
-
.add_missing_root_elements(xhtml_doc, title, epub_version) ⇒ Object
Method to add all missing items in XML root.
-
.add_styles(xhtml_doc, styles) ⇒ Object
Method for adding style sheets with links, method will not add duplicate items.
-
.add_viewport(xhtml_doc, viewport_size) ⇒ Object
Adds viewport meta tag to head of some document, but only if there is not some existing tag.
-
.resolve_images(xhtml_doc, file_path, file_resolver) ⇒ Object
Nil.
-
.resolve_links(xhtml_doc, file_path, file_finder) ⇒ Array<URI>
Resolves all links to files in XHTML document and returns the valid and resolved versions.
-
.resolve_links_for(xhtml_doc, tag_name, attribute_name, groups, file_path, file_finder) ⇒ Array<URI>
Resolves all links to files in XHTML document and returns the valid and resolved versions.
-
.resolved_link_to_file(path, groups, file_path, file_finder) ⇒ URI
Method which will resolve path to file from pattern.
- .using_javascript?(xhtml_doc) ⇒ Bool
-
.xml_document_from_string(text, file_path = nil) ⇒ Nokogiri::XML::Document
Method for parsing incomplete XML, supports multiple root elements.
Class Method Details
.add_missing_root_elements(xhtml_doc, title, epub_version) ⇒ Object
Method to add all missing items in XML root
Required items:
- html (with all namespaces and other attributes)
- body
- head (with title)
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/epuber/compiler/xhtml_processor.rb', line 65 def self.add_missing_root_elements(xhtml_doc, title, epub_version) # add missing body element if xhtml_doc.at_css('body').nil? xhtml_doc.root.surround_with_element('body') end # add missing root html element if xhtml_doc.at_css('html').nil? attrs = {} attrs['xmlns'] = 'http://www.w3.org/1999/xhtml' attrs['xmlns:epub'] = 'http://www.idpf.org/2007/ops' if epub_version >= 3 xhtml_doc.root.surround_with_element('html', attrs) end # add missing head in html if xhtml_doc.at_css('html > head').nil? html = xhtml_doc.css('html').first head = xhtml_doc.create_element('head') head << xhtml_doc.create_element('title', title) head << xhtml_doc.create_element('meta', charset: 'utf-8') if epub_version >= 3.0 html.children.first.before(head) end end |
.add_styles(xhtml_doc, styles) ⇒ Object
Method for adding style sheets with links, method will not add duplicate items
97 98 99 100 101 102 103 104 105 106 |
# File 'lib/epuber/compiler/xhtml_processor.rb', line 97 def self.add_styles(xhtml_doc, styles) head = xhtml_doc.at_css('html > head') old_links = head.css('link[rel="stylesheet"]').map { |node| node['href'] } links_to_add = styles - old_links links_to_add.each do |path| head << xhtml_doc.create_element('link', href: path, rel: 'stylesheet', type: 'text/css') end end |
.add_viewport(xhtml_doc, viewport_size) ⇒ Object
Adds viewport meta tag to head of some document, but only if there is not some existing tag
113 114 115 116 117 118 119 |
# File 'lib/epuber/compiler/xhtml_processor.rb', line 113 def self.(xhtml_doc, ) head = xhtml_doc.at_css('html > head') return unless head.at_css("meta[name='viewport']").nil? s = head << xhtml_doc.create_element('meta', name: 'viewport', content: "width=#{s.width},height=#{s.height}") end |
.resolve_images(xhtml_doc, file_path, file_resolver) ⇒ Object
Returns nil.
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
# File 'lib/epuber/compiler/xhtml_processor.rb', line 222 def self.resolve_images(xhtml_doc, file_path, file_resolver) dirname = File.dirname(file_path) xhtml_doc.css('img').each do |img| path = img['src'] next if path.nil? begin new_path = file_resolver.dest_finder.find_file(path, groups: :image, context_path: dirname) rescue UnparseableLinkError, FileFinders::FileNotFoundError, FileFinders::MultipleFilesFoundError begin new_path = resolved_link_to_file(path, :image, dirname, file_resolver.source_finder).to_s pkg_abs_path = File.(new_path, dirname).unicode_normalize pkg_new_path = Pathname.new(pkg_abs_path).relative_path_from(Pathname.new(file_resolver.source_path)).to_s file = FileTypes::ImageFile.new(pkg_new_path) file.path_type = :manifest file_resolver.add_file(file) rescue UnparseableLinkError, FileFinders::FileNotFoundError, FileFinders::MultipleFilesFoundError => e UI.warning(e.to_s, location: img) next end end img['src'] = new_path end end |
.resolve_links(xhtml_doc, file_path, file_finder) ⇒ Array<URI>
Resolves all links to files in XHTML document and returns the valid and resolved versions
201 202 203 204 205 206 |
# File 'lib/epuber/compiler/xhtml_processor.rb', line 201 def self.resolve_links(xhtml_doc, file_path, file_finder) [ resolve_links_for(xhtml_doc, 'a', 'href', :text, file_path, file_finder), resolve_links_for(xhtml_doc, 'map > area', 'href', :text, file_path, file_finder), ].flatten end |
.resolve_links_for(xhtml_doc, tag_name, attribute_name, groups, file_path, file_finder) ⇒ Array<URI>
Resolves all links to files in XHTML document and returns the valid and resolved versions
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
# File 'lib/epuber/compiler/xhtml_processor.rb', line 168 def self.resolve_links_for(xhtml_doc, tag_name, attribute_name, groups, file_path, file_finder) founded_links = [] xhtml_doc.css("#{tag_name}[#{attribute_name}]").each do |node| begin src = node[attribute_name] # @type [String] src next if src.nil? target_file = resolved_link_to_file(src, groups, file_path, file_finder) founded_links << target_file node[attribute_name] = target_file.to_s rescue UnparseableLinkError, FileFinders::FileNotFoundError, FileFinders::MultipleFilesFoundError => e UI.warning(e.to_s, location: node) # skip not found files next end end founded_links end |
.resolved_link_to_file(path, groups, file_path, file_finder) ⇒ URI
Method which will resolve path to file from pattern
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
# File 'lib/epuber/compiler/xhtml_processor.rb', line 132 def self.resolved_link_to_file(path, groups, file_path, file_finder) raise FileFinders::FileNotFoundError.new(path, file_path) if path.empty? begin uri = URI(path) rescue URI::InvalidURIError begin uri = URI(URI::encode(path)) rescue URI::InvalidURIError # skip not valid uri raise UnparseableLinkError, "Unparseable link `#{path}`" end end # skip uri with scheme (links to web pages) return uri unless uri.scheme.nil? # skip empty path return uri if uri.path.empty? && !uri.fragment.nil? && !uri.fragment.empty? uri.path = file_finder.find_file(uri.path, groups: groups, context_path: file_path) uri end |
.using_javascript?(xhtml_doc) ⇒ Bool
212 213 214 |
# File 'lib/epuber/compiler/xhtml_processor.rb', line 212 def self.using_javascript?(xhtml_doc) !xhtml_doc.at_css('script').nil? end |
.xml_document_from_string(text, file_path = nil) ⇒ Nokogiri::XML::Document
Method for parsing incomplete XML, supports multiple root elements
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/epuber/compiler/xhtml_processor.rb', line 22 def self.xml_document_from_string(text, file_path = nil) if /\A[\n\r ]+(<\?xml)/ =~ text UI.warning('XML header must be at the beginning of document', location: UI::Location.new(file_path, 1)) text = text.lstrip end doc = Nokogiri::XML(text) doc.encoding = 'UTF-8' doc.file_path = file_path fragment = Nokogiri::XML.fragment(text) root_elements = fragment.children.select { |el| el.element? } if root_elements.count == 1 doc.root = root_elements.first elsif fragment.at_css('body').nil? doc.root = doc.create_element('body') fragment.children.select do |child| child.element? || child.comment? || child.text? end.each do |child| doc.root.add_child(child) end end doc end |