31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
# File 'lib/file_utils/file_utils.rb', line 31
def FileUtils.run
if Params['command'] == 'mksymlink'
if Params['ref_cd'].nil?
Log.error ("--ref_cd is not set")
return
end
ref_cd = ContentData.new()
ref_cd.from_file(Params['ref_cd'])
if ref_cd.nil?
Log.error ("Error loading content data ref_cd=%s" % Params['ref_cd'])
return
end
if Params['base_cd'].nil?
Log.error ("--base_cd is not set")
return
end
base_cd = ContentData.new()
base_cd.from_file(Params['base_cd'])
if base_cd.nil?
Log.error ("Error loading content data base_cd=%s" % Params['base_cd'])
return
end
if Params['dest'].nil?
Log.error ("--dest is not set")
return
end
not_found = nil
begin
not_found = FileUtil.mksymlink(ref_cd, base_cd, Params['dest'])
rescue NotImplementedError
Log.error ("symlinks are unimplemented on this machine")
return nil
end
return not_found
elsif (Params['command'] == "merge" or
Params['command'] == "intersect" or
Params['command'] == "minus")
if Params['cd_a'].nil?
Log.error ("--cd_a is not set")
return
end
cd_a = ContentData.new()
cd_a.from_file(Params['cd_a'])
if cd_a.nil?
Log.error ("Error loading content data cd_a=%s" % Params['cd_a'])
return
end
if Params['cd_b'].nil?
Log.error ("--cd_b is not set")
return
end
cd_b = ContentData.new()
cd_b.from_file(Params['cd_b'])
if cd_b.nil?
Log.error ("Error loading content data cd_b=%s" % Params['cd_b'])
return
end
if Params['cd_b'].nil?
Log.error ("--dest is not set")
return
end
output = FileUtil.contet_data_command(Params['command'], cd_a, cd_b, Params['dest'])
elsif Params['command'] == 'unify_time'
if Params['cd'].nil?
Log.error ("--cd is not set")
return
end
cd = ContentData.new()
cd.from_file(Params['cd'])
if cd.nil?
Log.error ("Error loading content data cd=%s" % Params['cd'])
return
end
output = unify_time(cd)
elsif Params['command'] == 'indexer'
if Params['patterns'].nil?
Log.error ("--patterns is not set")
return
end
patterns = FileIndexing::IndexerPatterns.new
Params['patterns'].split(':').each { |pattern|
Log.debug1 "Pattern: #{pattern}"
patterns.add_pattern File.expand_path(pattern)
}
unless patterns.size > 0
Log.error ("Error loading patterns=%s (empty file)" % Params['patterns'])
return
end
exist_cd = nil
if not Params['exist_cd'].nil?
exist_cd = ContentData::ContentData.new()
exist_cd.from_file(Params['exist_cd'])
if exist_cd.nil?
Log.error ("Error loading content data exist_cd=%s" % Params['exist_cd'])
return
end
end
indexer = FileIndexing::IndexAgent.new
indexer.index(patterns, exist_cd)
Log.debug1 indexer.indexed_content.to_s
elsif Params['command'] == 'crawler'
if Params['conf_file'].nil?
Log.error ("--conf_file is not set")
return
end
if not File.exists?(Params['conf_file'])
Log.error ("config file doesn't exist conf_file=%s" % Params['conf_file'])
return
end
if Params['cd_out'].nil?
time = Tme.now.utc
Params['cd_out'] = "crawler.out.#{time.strftime('%Y/%m/%d_%H-%M-%S')}"
end
unless (Params['cd_in'].nil?)
if not File.exists?(Params['conf_file'])
Log.error ("input data file doesn't exist cd_in=%s" % Params['cd_in'])
return
end
end
conf = Configuration.new(Params['conf_file'])
threads = Array.new
conf.server_conf_vec.each do |server|
threads.push(Thread.new { Crawler.new(server, Params['cd_out'], Params['cd_in']) })
end
threads.each { |a| a.join }
join_servers_results(conf.server_conf_vec, Params['cd_out'])
elsif Params['command'] == 'generate_files'
fg = FileGenerator::FileGenerator.new()
fg.run()
end
end
|