Class: CanvasLinkMigrator::LinkParser
- Inherits:
-
Object
- Object
- CanvasLinkMigrator::LinkParser
- Defined in:
- lib/canvas_link_migrator/link_parser.rb
Constant Summary collapse
- REFERENCE_KEYWORDS =
%w[CANVAS_COURSE_REFERENCE CANVAS_OBJECT_REFERENCE WIKI_REFERENCE IMS_CC_FILEBASE IMS-CC-FILEBASE].freeze
- LINK_PLACEHOLDER =
"LINK.PLACEHOLDER"- KNOWN_REFERENCE_TYPES =
%w[ announcements appointment_participants assignment_groups assignments attachments calendar_events context_external_tools context_module_tags context_modules course_paces created_learning_outcomes discussion_entries discussion_topics external_feeds grading_standards groups learning_outcome_groups learning_outcome_links learning_outcomes linked_learning_outcomes media_attachments_iframe modules pages quizzes rubrics wiki wiki_pages ].freeze
- CONTAINER_TYPES =
%w[div p body].freeze
- LINK_ATTRS =
%w[rel href src srcset data value longdesc data-download-url].freeze
- RCE_MEDIA_TYPES =
%w[audio video].freeze
Instance Attribute Summary collapse
-
#migration_query_service ⇒ Object
readonly
Returns the value of attribute migration_query_service.
-
#unresolved_link_map ⇒ Object
readonly
Returns the value of attribute unresolved_link_map.
Instance Method Summary collapse
- #add_unresolved_link(link, item_type, mig_id, field) ⇒ Object
- #convert(html, item_type, mig_id, field, remove_outer_nodes_if_one_child: nil) ⇒ Object
- #convert_link(node, attr, item_type, mig_id, field) ⇒ Object
-
#initialize(migration_query_service) ⇒ LinkParser
constructor
A new instance of LinkParser.
-
#parse_url(url, node, attr) ⇒ Object
returns a hash with resolution status and data to hold onto if unresolved.
- #placeholder(old_value) ⇒ Object
- #reset! ⇒ Object
- #resolved(new_url = nil) ⇒ Object
- #unresolved(type, data = {}) ⇒ Object
Constructor Details
#initialize(migration_query_service) ⇒ LinkParser
Returns a new instance of LinkParser.
63 64 65 66 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 63 def initialize(migration_query_service) @migration_query_service = migration_query_service reset! end |
Instance Attribute Details
#migration_query_service ⇒ Object (readonly)
Returns the value of attribute migration_query_service.
61 62 63 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 61 def migration_query_service @migration_query_service end |
#unresolved_link_map ⇒ Object (readonly)
Returns the value of attribute unresolved_link_map.
61 62 63 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 61 def unresolved_link_map @unresolved_link_map end |
Instance Method Details
#add_unresolved_link(link, item_type, mig_id, field) ⇒ Object
72 73 74 75 76 77 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 72 def add_unresolved_link(link, item_type, mig_id, field) key = { type: item_type, migration_id: mig_id } @unresolved_link_map[key] ||= {} @unresolved_link_map[key][field] ||= [] @unresolved_link_map[key][field] << link end |
#convert(html, item_type, mig_id, field, remove_outer_nodes_if_one_child: nil) ⇒ Object
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 83 def convert(html, item_type, mig_id, field, remove_outer_nodes_if_one_child: nil) mig_id = mig_id.to_s doc = Nokogiri::HTML5(html || "") # Replace source tags with iframes doc.search("source[data-media-id]").each do |source| next unless RCE_MEDIA_TYPES.include?(source.parent.name) media_node = source.parent media_node.name = "iframe" media_node["src"] = source["src"] source.remove end doc.search("*").each do |node| LINK_ATTRS.each do |attr| convert_link(node, attr, item_type, mig_id, field) end end node = doc.at_css("body") return "" unless node if remove_outer_nodes_if_one_child while node.children.size == 1 && node.child.child break unless CONTAINER_TYPES.member?(node.child.name) && node.child.attributes.blank? node = node.child end end node.inner_html rescue Nokogiri::SyntaxError "" end |
#convert_link(node, attr, item_type, mig_id, field) ⇒ Object
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 119 def convert_link(node, attr, item_type, mig_id, field) return unless node[attr].present? if attr == "value" && !(node[attr] =~ /IMS(?:-|_)CC(?:-|_)FILEBASE/ || node[attr].include?("CANVAS_COURSE_REFERENCE")) return end url = node[attr].dup REFERENCE_KEYWORDS.each do |ref| url.gsub!("%24#{ref}%24", "$#{ref}$") end begin result = parse_url(url, node, attr) rescue Addressable::URI::InvalidURIError return end if result[:resolved] # resolved, just replace and carry on new_url = result[:new_url] || url unless CanvasLinkMigrator.relative_url?(new_url) # perform configured substitutions if (processed_url = @migration_query_service.process_domain_substitutions(new_url)) new_url = processed_url end # relative-ize absolute links outside the course but inside our domain # (analogous to what is done in Api#process_incoming_html_content) begin uri = URI.parse(new_url) account_hosts = @migration_query_service.context_hosts.map { |h| h.split(":").first } if account_hosts.include?(uri.host) uri.scheme = uri.host = uri.port = nil new_url = uri.to_s end rescue URI::InvalidURIError, URI::InvalidComponentError nil end end node[attr] = new_url else result.delete(:resolved) if result[:link_type] == :media_object # because we may actually change the media comment node itself # (rather than just replacing a value), we're going to # replace the entire node with a placeholder result[:old_value] = node.to_xml result[:placeholder] = placeholder(result[:old_value]) placeholder_node = Nokogiri::HTML5.fragment(result[:placeholder]) node.replace(placeholder_node) else result[:old_value] = node[attr] result[:placeholder] = placeholder(result[:old_value]) # replace the inner html of an anchor tag if it matches the href if node.name == "a" && attr == "href" && node["href"] == node.inner_html.delete("\n").strip node.inner_html = result[:placeholder] end node[attr] = result[:placeholder] end add_unresolved_link(result, item_type, mig_id, field) end end |
#parse_url(url, node, attr) ⇒ Object
returns a hash with resolution status and data to hold onto if unresolved
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 193 def parse_url(url, node, attr) parsed_url = Addressable::URI.parse(url) query_values = parsed_url.query_values = query_values.try(:delete, "media_attachment") == "true" if parsed_url.query_values = query_values.presence || nil url = Addressable::URI.unencode(parsed_url) end if url =~ /wiki_page_migration_id=(.*)/ unresolved(:wiki_page, migration_id: $1) elsif url =~ /discussion_topic_migration_id=(.*)/ unresolved(:discussion_topic, migration_id: $1) elsif url =~ %r{\$CANVAS_COURSE_REFERENCE\$/modules/items/([^?]*)(\?.*)?} unresolved(:module_item, migration_id: $1, query: $2) elsif url =~ %r{\$CANVAS_COURSE_REFERENCE\$/file_ref/([^/?#]+)(.*)} unresolved(:file_ref, migration_id: $1, rest: $2, in_media_iframe: attr == "src" && ["iframe", "source"].include?(node.name) && node["data-media-id"], media_attachment: ) elsif url =~ %r{(?:\$CANVAS_OBJECT_REFERENCE\$|\$WIKI_REFERENCE\$)/([^/]*)/([^?]*)(\?.*)?} if KNOWN_REFERENCE_TYPES.include?($1) unresolved(:object, type: $1, migration_id: $2, query: $3) else # If the `type` is not known, there's something amiss... @migration_query_service.report_link_parse_warning($1) resolved(url) end elsif url =~ %r{\$CANVAS_COURSE_REFERENCE\$/(.*)} resolved("#{@migration_query_service.context_path}/#{$1}") elsif url =~ %r{\$IMS(?:-|_)CC(?:-|_)FILEBASE\$/(.*)} rel_path = URI::DEFAULT_PARSER.unescape($1) if (attr == "href" && node["class"]&.include?("instructure_inline_media_comment")) || (attr == "src" && ["iframe", "source"].include?(node.name) && node["data-media-id"]) unresolved(:media_object, rel_path: rel_path, media_attachment: ) else unresolved(:file, rel_path: rel_path) end elsif (attr == "href" && node["class"]&.include?("instructure_inline_media_comment")) || (attr == "src" && ["iframe", "source"].include?(node.name) && node["data-media-id"]) # Course copy media reference, leave it alone resolved elsif @migration_query_service. && attr == "src" && (info_match = url.match(%r{\Adata:(?<mime_type>[-\w]+/[-\w+.]+)?;base64,(?<image>.*)}m)) result = @migration_query_service.(info_match) if result[:resolved] resolved(result[:url]) else unresolved(:file, rel_path: result[:url]) end elsif # rubocop:disable Lint/DuplicateBranch # Equation image, leave it alone (attr == "src" && node["class"] && node["class"].include?("equation_image")) || # rubocop:disable Layout/ConditionPosition # The file is in the context of an AQ, leave the link alone url =~ %r{\A/assessment_questions/\d+/files/\d+} || # This points to a specific file already, leave it alone url =~ %r{\A/courses/\d+/files/\d+} || !@migration_query_service.fix_relative_urls? || # It's just a link to an anchor, leave it alone url.start_with?("#") resolved elsif CanvasLinkMigrator.relative_url?(url) unresolved(:file, rel_path: URI::DEFAULT_PARSER.unescape(url)) else # rubocop:disable Lint/DuplicateBranch resolved end end |
#placeholder(old_value) ⇒ Object
79 80 81 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 79 def placeholder(old_value) "#{LINK_PLACEHOLDER}_#{Digest::MD5.hexdigest(old_value)}" end |
#reset! ⇒ Object
68 69 70 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 68 def reset! @unresolved_link_map = {} end |
#resolved(new_url = nil) ⇒ Object
188 189 190 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 188 def resolved(new_url = nil) { resolved: true, new_url: new_url} end |
#unresolved(type, data = {}) ⇒ Object
184 185 186 |
# File 'lib/canvas_link_migrator/link_parser.rb', line 184 def unresolved(type, data = {}) { resolved: false, link_type: type }.merge(data) end |