Class: MultiprocessQueueClient

Inherits:
GHTorrent::Command show all
Includes:
GHTorrent::Logging, GHTorrent::Settings
Defined in:
lib/ghtorrent/multiprocess_queue_client.rb

Overview

A generic class to implement a multiprocess client for clients that listen to the same queue for incoming messages.

Client creation is driven by an input file whose format is described in #options.banner.

MultiprocessQueueClient expects overriding classes to overload the #clazz method with the name of a class that respects the following protocol:

MultiprocessQueueClient does not do any error handling/respawning of dead children. Client/Sub classes are expected to handle errors by themselves.

Examples:

class ExampleQueueClient
  # Constructor accepting a GHTorrent::Settings and a String object
  # representing the client configuration and the queue to connect to
  def initialize(config, queue); end

  # Start processing using the specified GHTorrent::Command as input
  def run(command); end

  # Stop processing of queue messages
  def stop; end
end

Direct Known Subclasses

GHTRetrieveRepos, GHTRetrieveUsers

Constant Summary

Constants included from GHTorrent::Settings

GHTorrent::Settings::CONFIGKEYS, GHTorrent::Settings::DEFAULTS

Instance Method Summary collapse

Methods included from GHTorrent::Logging

#debug, #error, #info, #loggerr, #warn

Methods included from GHTorrent::Settings

#config, #merge, #merge_config_values, #override_config, #settings

Methods included from GHTorrent::Utils

included, #read_value, #user_type, #write_value

Methods inherited from GHTorrent::Command

#command_name, #override_config, #process_options, #queue_client, run, #version

Instance Method Details

#clazzObject



31
32
33
# File 'lib/ghtorrent/multiprocess_queue_client.rb', line 31

def clazz
  raise('Unimplemented')
end

#goObject



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/ghtorrent/multiprocess_queue_client.rb', line 66

def go

  configs = File.open(ARGV[0]).readlines.map do |line|
    next if line =~ /^#/
    case line.strip.split(/ /)[0]
      when 'U'
        type, ip, name, passwd, instances = line.strip.split(/ /)
      when 'T'
        type, ip, token, instances = line.strip.split(/ /)
    end

    (1..instances.to_i).map do |i|
      newcfg = self.settings.clone
      newcfg = override_config(newcfg, :attach_ip, ip)

      case type
        when 'U'
          newcfg = override_config(newcfg, :github_username, name)
          newcfg = override_config(newcfg, :github_passwd, passwd)
        when 'T'
          newcfg = override_config(newcfg, :github_token, token)
      end

      newcfg = override_config(newcfg, :mirror_history_pages_back, 100000)
      newcfg
    end
  end.flatten.select { |x| !x.nil? }

  children = configs.map do |config|
    pid = Process::fork

    if pid.nil?
      retriever = clazz.new(config, options[:queue])

      Signal.trap('TERM') {
        retriever.stop
      }

      retriever.run(self)
      exit
    else
      debug "Parent #{Process.pid} forked child #{pid}"
      pid
    end
  end

  debug 'Waiting for children'
  begin
    children.each do |pid|
      debug "Waiting for child #{pid}"
      Process.waitpid(pid, 0)
      debug "Child #{pid} exited"
    end
  rescue Interrupt
    debug 'Stopping'
  end
end

#prepare_options(options) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/ghtorrent/multiprocess_queue_client.rb', line 35

def prepare_options(options)
  options.banner <<-BANNER
Retrieve data for multiple repos in parallel. To work, it requires
a mapping file formatted as either of the follow formats:

U IP UNAME PASSWD NUM_PROCS
T IP TOKEN NUM_PROCS

{U,T}: U signifies that a username/password pair is provided, T that an OAuth
     token is specified instead
IP: address to use for outgoing requests (use 0.0.0.0 on non-multihomed hosts)
UNAME: Github user name to use for outgoing requests
PASSWD: Github password to use for outgoing requests
TOKEN: Github OAuth token
NUM_PROCS: Number of processes to spawn for this IP/UNAME combination

Values in the config.yaml file set with the -c command are overridden.

#{command_name} [options] mapping-file

  BANNER
  options.opt :queue, 'Queue to retrieve project names from',
              :short => 'q', :default => 'multiprocess-queue-client',
              :type => :string
end

#validateObject



61
62
63
64
# File 'lib/ghtorrent/multiprocess_queue_client.rb', line 61

def validate
  super
  Trollop::die 'Argument mapping-file is required' unless not args[0].nil?
end