Class: Server
- Inherits:
-
Object
- Object
- Server
- Defined in:
- lib/w3m-autopagerize-server.rb
Overview
Entry Points #
Constant Summary collapse
- HTML_OUTPUT_FILE =
"/tmp/w3m-autopagerize-tmp.html"
Instance Method Summary collapse
- #crop_this_page(url, srcfile, charset, client) ⇒ Object
- #load_config_file(config_file) ⇒ Object
-
#load_siteinfo ⇒ Object
(executable-interpret “ruby19 -r w3m-autopagerize-server -e ‘$logger=Logger.new(); load_siteinfo’”) (executable-interpret “ruby18 -r w3m-autopagerize-server -e ‘$logger=Logger.new(); load_siteinfo’”).
- #nextpage(url, srcfile, charset, client) ⇒ Object
- #prepare(url, srcfile, charset, client, method) ⇒ Object
- #restart ⇒ Object
Instance Method Details
#crop_this_page(url, srcfile, charset, client) ⇒ Object
349 350 351 352 353 354 355 356 357 358 |
# File 'lib/w3m-autopagerize-server.rb', line 349 def crop_this_page(url, srcfile, charset, client) src, url = prepare(url, srcfile, charset, client, __method__) sitedata = sitedata url begin crop_html url, nil, sitedata ensure prefetch_next_location sitedata.next_url(url), sitedata end end |
#load_config_file(config_file) ⇒ Object
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 |
# File 'lib/w3m-autopagerize-server.rb', line 429 def load_config_file(config_file) if config_file == :ignore $stderr.puts "load_config_file: config file is ignored!" else config_file = File.(config_file, File.dirname(__FILE__)) if File.file? config_file load(config_file) $stderr.puts "load_config_file: loaded #{config_file}" else $stderr.puts "load_config_file: config file #{config_file} not found!" end end rescue Exception $stderr.puts "load_config_file: error loading #{config_file}!" end |
#load_siteinfo ⇒ Object
(executable-interpret “ruby19 -r w3m-autopagerize-server -e ‘$logger=Logger.new(); load_siteinfo’”) (executable-interpret “ruby18 -r w3m-autopagerize-server -e ‘$logger=Logger.new(); load_siteinfo’”)
410 411 412 413 414 415 416 417 418 419 420 421 422 423 |
# File 'lib/w3m-autopagerize-server.rb', line 410 def load_siteinfo keys = %w[exampleUrl insertBefore pageElement nextLink] $SITEINFO_IMPORT_URLS.each do |siteinfo_url| JSON.parse(get_content(siteinfo_url).toutf8).each do |hash| data = hash["data"] if url = data["url"] and not $EXCLUDE_URLS.include? url sd = SiteData.new data["nextLink"], data["insertBefore"], data["exampleUrl"], data["pageElement"] $SITEINFO << [Regexp.new(url), sd] end end end $logger.info "#{__method__}: loaded" end |
#nextpage(url, srcfile, charset, client) ⇒ Object
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 |
# File 'lib/w3m-autopagerize-server.rb', line 361 def nextpage(url, srcfile, charset, client) src, url = prepare(url, srcfile, charset, client, __method__) sitedata = sitedata url location = sitedata.next_url(url) if location if sitedata.pageElement $logger.debug "#{__method__}: location and pageElement found." else $logger.debug "#{__method__}: location found." end begin crop_html location, url, sitedata ensure prefetch_next_location location, sitedata end else fallback_nexturl = for fallback in SiteData.fallbacks u = fallback.next_url(url) and break u end if fallback_nexturl $logger.info "#{__method__}: fallback" begin crop_html fallback_nexturl, url, fallback ensure prefetch_next_location fallback_nexturl, fallback end else $logger.debug "#{__method__}: no location." raise "no location!" end end rescue html = %{<pre>Error! xpath = #{sitedata.nextLink || 'nextLink not found'} #{$!} #{$@.pretty_inspect} src_encoding=#{Kconv.guess(src || $content_cache[url].first)} </pre> } $logger.error "#{__method__}: error!" $logger.error "#{__method__}: #$!" $logger.error "#{__method__}: #{$@.pretty_inspect}" {:html => html} end |
#prepare(url, srcfile, charset, client, method) ⇒ Object
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 |
# File 'lib/w3m-autopagerize-server.rb', line 331 def prepare(url, srcfile, charset, client, method) $logger.info "==================================================" $logger.info "#{method}: entered url=#{url} charset=#{charset}" $logger.debug "#{method}: W3M_SOURCEFILE = #{srcfile}" if srcfile $client = client src = File.zread(srcfile).force_encoding("ASCII-8BIT") if srcfile if url =~ /^file:.*\/cgi-bin\// # from Local CGI url = src.force_encoding("ASCII-8BIT")[%r!<base href=['"](.+?)['"]!, 1] # '" $logger.info "#{method}: base url=#{url}" else # from W3M_SOURCEFILE $logger.debug "#{method}: set $content_cache[#{url.inspect}] from W3M_SOURCEFILE" $logger.debug "#{method}: source is html? = #{src =~ /<body/i and true}" $content_cache[url] = [src.force_encoding("ASCII-8BIT"), normalize_charset(charset)] if src end [ src, url ] end |
#restart ⇒ Object
425 426 427 |
# File 'lib/w3m-autopagerize-server.rb', line 425 def restart exec $0 end |