Class: Datahen::Scraper::RubyParserExecutor
- Inherits:
-
Executor
- Object
- Executor
- Datahen::Scraper::RubyParserExecutor
show all
- Defined in:
- lib/datahen/scraper/ruby_parser_executor.rb
Constant Summary
Constants inherited
from Executor
Executor::MAX_FIND_OUTPUTS_PER_PAGE
Instance Attribute Summary collapse
Attributes inherited from Executor
#filename, #gid, #job_id, #page
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Executor
#clean_backtrace, #eval_with_context, #find_output, #find_outputs, #finish, #finisher_update, #get_content, #get_failed_content, #get_job_id, #init_global_page, #init_job_page, #init_page, #parsing_update, #remove_old_dups!, #remove_old_output_dups!, #remove_old_page_dups!, #save_outputs, #save_pages, #save_pages_and_outputs, #seeding_update
#create_context, #expose_to, #exposed_env, #exposed_methods, #isolated_binding, #var_or_proc
Constructor Details
Returns a new instance of RubyParserExecutor.
17
18
19
20
21
22
23
24
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 17
def initialize(options={})
@filename = options.fetch(:filename) { raise "Filename is required"}
@page = options.fetch(:page) { nil }
@gid = (self.page || {})['gid'] || options.fetch(:gid) { raise "GID or a page with a GID is required"}
@job_id = options.fetch(:job_id)
@page_vars = options.fetch(:vars) { {} }
@keep_outputs = !!(options.fetch(:keep_outputs) { false })
end
|
Instance Attribute Details
#limbo_self ⇒ Boollean
15
16
17
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 15
def limbo_self
@limbo_self
end
|
#refetch_self ⇒ Boollean
Note:
It is stronger than #reparse_self flag.
Refetch self page flag.
8
9
10
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 8
def refetch_self
@refetch_self
end
|
#reparse_self ⇒ Boollean
Note:
It is stronger than #limbo_self flag.
Reparse self page flag.
12
13
14
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 12
def reparse_self
@reparse_self
end
|
#save ⇒ Object
Returns the value of attribute save.
4
5
6
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 4
def save
@save
end
|
Class Method Details
.exposed_methods ⇒ Object
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 26
def self.exposed_methods
[
:content,
:failed_content,
:outputs,
:pages,
:page,
:save_pages,
:save_outputs,
:find_output,
:find_outputs,
:refetch,
:reparse,
:limbo
].freeze
end
|
Instance Method Details
#content ⇒ Object
214
215
216
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 214
def content
@content ||= get_content(job_id, gid)
end
|
#eval_parser_script(save = false) ⇒ Object
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 164
def eval_parser_script(save=false)
update_parsing_starting_status
proc = Proc.new do
page = init_page
outputs = []
pages = []
page = init_page_vars(page)
self.refetch_self = false
self.reparse_self = false
self.limbo_self = false
begin
context = isolated_binding({
outputs: outputs,
pages: pages,
page: page
})
eval_with_context filename, context
rescue Error::SafeTerminateError => e
rescue SyntaxError => e
handle_error(e) if save
raise e
rescue => e
handle_error(e) if save
raise e
end
puts "=========== Parsing Executed ==========="
begin
save_pages_and_outputs(pages, outputs, :parsing) unless refetch_self
rescue => e
handle_error(e) if save
raise e
end
if refetch_self
refetch_page gid
elsif reparse_self
reparse_page gid
elsif limbo_self
limbo_page gid
else
update_parsing_done_status
end
end
proc.call
end
|
#exec_parser(save = false) ⇒ Object
43
44
45
46
47
48
49
50
51
52
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 43
def exec_parser(save=false)
@save = save
if save
puts "Executing parser script"
else
puts "Trying parser script"
end
eval_parser_script(save)
end
|
#failed_content ⇒ Object
218
219
220
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 218
def failed_content
@failed_content ||= get_failed_content(job_id, gid)
end
|
#handle_error(e) ⇒ Object
222
223
224
225
226
227
228
229
230
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 222
def handle_error(e)
error = ["Parsing #{e.class}: #{e.to_s} (Job:#{job_id} GID:#{gid})",clean_backtrace(e.backtrace)].join("\n")
parsing_update(
job_id: job_id,
gid: gid,
parsing_status: :failed,
log_error: error)
end
|
#init_page_vars(page) ⇒ Object
54
55
56
57
58
59
60
61
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 54
def init_page_vars(page)
return self.page unless self.page.nil?
if !@page_vars.nil? && !@page_vars.empty?
page['vars'] = @page_vars
end
page
end
|
#limbo(page_gid) ⇒ Object
155
156
157
158
159
160
161
162
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 155
def limbo page_gid
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
if page_gid == gid
self.limbo_self = true
raise Error::SafeTerminateError
end
limbo_page page_gid
end
|
#limbo_page(gid) ⇒ Object
146
147
148
149
150
151
152
153
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 146
def limbo_page gid
if save
Client::JobPage.new({gid: gid}).limbo(self.job_id)
puts "Limbo page #{gid}"
else
puts "Would have limbo page #{gid}"
end
end
|
#refetch(page_gid) ⇒ Object
119
120
121
122
123
124
125
126
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 119
def refetch page_gid
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
if page_gid == gid
self.refetch_self = true
raise Error::SafeTerminateError
end
refetch_page page_gid
end
|
#refetch_page(gid) ⇒ Object
110
111
112
113
114
115
116
117
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 110
def refetch_page gid
if save
Client::JobPage.new({gid: gid}).refetch(self.job_id)
puts "Refetch page #{gid}"
else
puts "Would have refetch page #{gid}"
end
end
|
#reparse(page_gid) ⇒ Object
137
138
139
140
141
142
143
144
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 137
def reparse page_gid
raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
if page_gid == gid
self.reparse_self = true
raise Error::SafeTerminateError
end
reparse_page page_gid
end
|
#reparse_page(gid) ⇒ Object
128
129
130
131
132
133
134
135
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 128
def reparse_page gid
if save
Client::JobPage.new({gid: gid}).reparse(self.job_id)
puts "Reparse page #{gid}"
else
puts "Would have reparse page #{gid}"
end
end
|
#save_type ⇒ Object
106
107
108
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 106
def save_type
:parsing
end
|
#update_parsing_done_status ⇒ Object
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 90
def update_parsing_done_status
return unless save
response = parsing_update(
job_id: job_id,
gid: gid,
parsing_status: :done)
if response.code == 200
puts "Page Parsing Done."
else
puts "Error: Unable to save Page Parsing Done Status to server: #{response.body}"
raise "Unable to save Page Parsing Done Status to server: #{response.body}"
end
end
|
#update_parsing_starting_status ⇒ Object
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 72
def update_parsing_starting_status
return unless save
response = parsing_update(
job_id: job_id,
gid: gid,
parsing_status: :starting,
keep_outputs: @keep_outputs
)
if response.code == 200
puts "Page Parsing Status Updated."
else
puts "Error: Unable to save Page Parsing Status to server: #{response.body}"
raise "Unable to save Page Parsing Status to server: #{response.body}"
end
end
|
#update_to_server(opts = {}) ⇒ Object
63
64
65
66
67
68
69
70
|
# File 'lib/datahen/scraper/ruby_parser_executor.rb', line 63
def update_to_server(opts = {})
parsing_update(
job_id: opts[:job_id],
gid: opts[:gid],
pages: opts[:pages],
outputs: opts[:outputs],
parsing_status: opts[:status])
end
|