Class: ChupaText::Decomposers::WebKit
- Inherits:
-
Decomposer
- Object
- Decomposer
- ChupaText::Decomposers::WebKit
show all
- Includes:
- LogTag, Loggable
- Defined in:
- lib/chupa-text/decomposers/webkit.rb
Defined Under Namespace
Modules: LogTag
Classes: ExternalScreenshoter
Constant Summary
collapse
- TARGET_EXTENSIONS =
["htm", "html", "xhtml"]
- TARGET_MIME_TYPES =
[
"text/html",
"application/xhtml+xml",
]
- AVAILABLE_ATTRIBUTE_NAME =
"decomposer-webkit-screenshot-available"
- IN_PROCESS =
ENV["CHUPA_TEXT_DECOMPOSER_WEBKIT_IN_PROCESS"] == "yes"
Instance Method Summary
collapse
Instance Method Details
#decompose(data) {|data| ... } ⇒ Object
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
# File 'lib/chupa-text/decomposers/webkit.rb', line 66
def decompose(data)
body = data.source.body
uri = data.source.uri.to_s
output = Tempfile.new(["chupa-text-decomposer-webkit", ".png"])
width, height = data.expected_screenshot_size
if IN_PROCESS
screenshoter = ChupaTextDecomposerWebKit::Screenshoter.new(logger)
screenshoter.run(body, uri, output.path, width, height)
else
screenshoter = ExternalScreenshoter.new
screenshoter.run(data.source.path, uri, output.path, width, height)
end
unless File.size(output.path).zero?
png = output.read
data.screenshot = Screenshot.new("image/png",
[png].pack("m*"),
"base64")
end
data[AVAILABLE_ATTRIBUTE_NAME] = !data.screenshot.nil?
yield(data)
end
|
#target?(data) ⇒ Boolean
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
|
# File 'lib/chupa-text/decomposers/webkit.rb', line 41
def target?(data)
return false unless data.need_screenshot?
return false if data.screenshot
return false unless data[AVAILABLE_ATTRIBUTE_NAME].nil?
source = data.source
return false if source.nil?
return true if TARGET_EXTENSIONS.include?(source.extension)
return true if TARGET_MIME_TYPES.include?(source.mime_type)
source_body = source.body
return false if source_body.nil?
return true if source_body.start_with?("<!DOCTYPE html ")
return true if source_body.start_with?("<html")
false
end
|