Class: AtCoderFriends::ScrapingAgent
- Inherits:
-
Object
- Object
- AtCoderFriends::ScrapingAgent
show all
- Includes:
- PathUtil
- Defined in:
- lib/at_coder_friends/scraping_agent.rb
Overview
scrapes AtCoder contest site and
-
fetches problems
-
submits sources
Constant Summary
collapse
- BASE_URL =
'https://atcoder.jp/'
- XPATH_SECTION =
'//h3[.="%<title>s"]/following-sibling::section'
Constants included
from PathUtil
PathUtil::CASES_DIR, PathUtil::SMP_DIR
Instance Attribute Summary collapse
Instance Method Summary
collapse
Methods included from PathUtil
cases_dir, contest_name, smp_dir, split_prg_path
Constructor Details
#initialize(contest, config) ⇒ ScrapingAgent
Returns a new instance of ScrapingAgent.
21
22
23
24
25
26
27
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 21
def initialize(contest, config)
@contest = contest
@config = config
@agent = Mechanize.new
@agent.pre_connect_hooks << proc { sleep 0.1 }
end
|
Instance Attribute Details
#agent ⇒ Object
Returns the value of attribute agent.
19
20
21
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 19
def agent
@agent
end
|
#config ⇒ Object
Returns the value of attribute config.
19
20
21
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 19
def config
@config
end
|
#contest ⇒ Object
Returns the value of attribute contest.
19
20
21
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 19
def contest
@contest
end
|
Instance Method Details
#code_test(path, infile) ⇒ Object
71
72
73
74
75
76
77
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 71
def code_test(path, infile)
path, _dir, _prg, _base, ext, _q = split_prg_path(path)
src = File.read(path, encoding: Encoding::UTF_8)
data = File.read(infile)
login
code_test_loop(ext, src, data)
end
|
#code_test_loop(ext, src, data) ⇒ Object
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 151
def code_test_loop(ext, src, data)
page = agent.get(contest_url('custom_test'))
script = page.search('script').text
csrf_token = script.scan(/var csrfToken = "(.*)"/)[0][0]
payload = {
'data.LanguageId' => lang_id(ext),
'sourceCode' => src,
'input' => data,
'csrf_token' => csrf_token
}
page = agent.post(contest_url('custom_test/submit/json'), payload)
msg = page.body
raise AppError, msg unless msg.empty?
100.times do
page = agent.get(contest_url('custom_test/json?reload=true'))
data = JSON.parse(page.body)
return nil unless data.is_a?(Hash) && data['Result']
return data if data.dig('Result', 'Status') == 3
return data unless data['Interval']
sleep 1.0 * data['Interval'] / 1000
end
nil
end
|
#common_url(path) ⇒ Object
29
30
31
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 29
def common_url(path)
File.join(BASE_URL, path)
end
|
#constraints_pat ⇒ Object
37
38
39
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 37
def constraints_pat
config['constraints_pat'] || '^制約$'
end
|
#contest_url(path) ⇒ Object
33
34
35
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 33
def contest_url(path)
File.join(BASE_URL, 'contests', contest, path)
end
|
#fetch_all ⇒ Object
53
54
55
56
57
58
59
60
61
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 53
def fetch_all
puts "***** fetch_all #{@contest} *****"
login
fetch_assignments.map do |q, url|
pbm = fetch_problem(q, url)
yield pbm if block_given?
pbm
end
end
|
#fetch_assignments ⇒ Object
90
91
92
93
94
95
96
97
98
99
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 90
def fetch_assignments
url = contest_url('tasks')
puts "fetch list from #{url} ..."
page = agent.get(url)
page
.search('//table[1]//td[1]//a')
.each_with_object({}) do |a, h|
h[a.text] = a[:href]
end
end
|
#fetch_problem(q, url) ⇒ Object
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 101
def fetch_problem(q, url)
puts "fetch problem from #{url} ..."
page = agent.get(url)
Problem.new(q) do |pbm|
pbm.html = page.body
if contest == 'arc001'
page.search('//h3').each do |h3|
query = format(XPATH_SECTION, title: h3.content)
sections = page.search(query)
sections[0] && parse_section(pbm, h3, sections[0])
end
else
page.search('//*[./h3]').each do |section|
h3 = section.search('h3')[0]
parse_section(pbm, h3, section)
end
end
end
end
|
41
42
43
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 41
def input_fmt_pat
config['input_fmt_pat'] || '^入出?力$'
end
|
45
46
47
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 45
def input_smp_pat
config['input_smp_pat'] || '^入力例\s*(?<no>[\d0-9]+)$'
end
|
#lang_id(ext) ⇒ Object
199
200
201
202
203
204
205
206
207
208
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 199
def lang_id(ext)
config.dig('ext_settings', ext, 'submit_lang') || (
msg = " submit_lang for .\#{ext} is not specified.\n Available languages:\n \#{lang_list_txt || '(failed to fetch)'}\n MSG\n raise AppError, msg\n )\nend\n"
|
#lang_list ⇒ Object
179
180
181
182
183
184
185
186
187
188
189
190
191
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 179
def lang_list
@lang_list ||= begin
page = agent.get(contest_url('custom_test'))
form = page.forms[1]
sel = form.field_with(name: 'data.LanguageId')
sel && sel
.options
.reject { |opt| opt.value.empty? }
.map do |opt|
{ v: opt.value, t: opt.text }
end
end
end
|
#lang_list_txt ⇒ Object
193
194
195
196
197
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 193
def lang_list_txt
lang_list
&.map { |opt| "#{opt[:v]} - #{opt[:t]}" }
&.join("\n")
end
|
#login ⇒ Object
79
80
81
82
83
84
85
86
87
88
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 79
def login
return unless config['user'] && !config['user'].empty?
return unless config['password'] && !config['password'].empty?
page = agent.get(common_url('login'))
form = page.forms[1]
form.field_with(name: 'username').value = config['user']
form.field_with(name: 'password').value = config['password']
form.submit
end
|
#open_contest ⇒ Object
210
211
212
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 210
def open_contest
Launchy.open(contest_url(''))
end
|
#output_smp_pat ⇒ Object
49
50
51
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 49
def output_smp_pat
config['output_smp_pat'] || '^出力例\s*(?<no>[\d0-9]+)$'
end
|
#parse_section(pbm, h3, section) ⇒ Object
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 121
def parse_section(pbm, h3, section)
title = h3.content.strip
title.delete!("\u008f\u0090")
text = section.content
code = section.search('pre')[0]&.content || ''
case title
when /#{constraints_pat}/
pbm.desc += text
when /#{input_fmt_pat}/
pbm.desc += text
pbm.fmt = code
when /#{input_smp_pat}/
pbm.add_smp($LAST_MATCH_INFO[:no], :in, code)
when /#{output_smp_pat}/
pbm.add_smp($LAST_MATCH_INFO[:no], :exp, code)
end
end
|
#post_src(q, ext, src) ⇒ Object
139
140
141
142
143
144
145
146
147
148
149
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 139
def post_src(q, ext, src)
page = agent.get(contest_url('submit'))
form = page.forms[1]
form.field_with(name: 'data.TaskScreenName') do |sel|
option = sel.options.find { |op| op.text.start_with?(q) }
option&.select || (raise AppError, "unknown problem:#{q}.")
end
form.add_field!('data.LanguageId', lang_id(ext))
form.field_with(name: 'sourceCode').value = src
form.submit
end
|
#submit(path) ⇒ Object
63
64
65
66
67
68
69
|
# File 'lib/at_coder_friends/scraping_agent.rb', line 63
def submit(path)
path, _dir, prg, _base, ext, q = split_prg_path(path)
puts "***** submit #{prg} *****"
src = File.read(path, encoding: Encoding::UTF_8)
login
post_src(q, ext, src)
end
|