35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
# File 'lib/file_utils/file_utils.rb', line 35
def self.run
if Params['command'] == 'mksymlink'
if Params['ref_cd'].nil?
Log.error ("--ref_cd is not set")
return
end
ref_cd = ContentData.new()
ref_cd.from_file(Params['ref_cd'])
if ref_cd.nil?
Log.error ("Error loading content data ref_cd=%s" % Params['ref_cd'])
return
end
if Params['base_cd'].nil?
Log.error ("--base_cd is not set")
return
end
base_cd = ContentData.new()
base_cd.from_file(Params['base_cd'])
if base_cd.nil?
Log.error ("Error loading content data base_cd=%s" % Params['base_cd'])
return
end
if Params['dest'].nil?
Log.error ("--dest is not set")
return
end
not_found = nil
begin
not_found = FileUtils.mksymlink(ref_cd, base_cd, Params['dest'])
rescue NotImplementedError
Log.error ("symlinks are unimplemented on this machine")
return nil
end
return not_found
elsif (Params['command'] == "merge" ||
Params['command'] == "intersect" ||
Params['command'] == "minus")
content_data_command
elsif Params['command'] == 'unify_time'
if Params['cd'].nil?
Log.error ("--cd is not set")
return
end
cd = ContentData.new()
cd.from_file(Params['cd'])
if cd.nil?
Log.error ("Error loading content data cd=%s" % Params['cd'])
return
end
output = unify_time(cd)
elsif Params['command'] == 'indexer'
if Params['patterns'].nil?
Log.error ("--patterns is not set")
return
end
patterns = FileIndexing::IndexerPatterns.new
Params['patterns'].split(':').each { |pattern|
Log.debug1 "Pattern: #{pattern}"
patterns.add_pattern File.expand_path(pattern)
}
unless patterns.size > 0
Log.error ("Error loading patterns=%s (empty file)" % Params['patterns'])
return
end
exist_cd = nil
if not Params['exist_cd'].nil?
exist_cd = ContentData::ContentData.new()
exist_cd.from_file(Params['exist_cd'])
if exist_cd.nil?
Log.error ("Error loading content data exist_cd=%s" % Params['exist_cd'])
return
end
end
indexer = FileIndexing::IndexAgent.new
indexer.index(patterns, exist_cd)
Log.debug1 indexer.indexed_content.to_s
elsif Params['command'] == 'crawler'
if Params['conf_file'].nil?
Log.error ("--conf_file is not set")
return
end
if not File.exists?(Params['conf_file'])
Log.error ("config file doesn't exist conf_file=%s" % Params['conf_file'])
return
end
if Params['cd_out'].nil?
time = Tme.now.utc
Params['cd_out'] = "crawler.out.#{time.strftime('%Y/%m/%d_%H-%M-%S')}"
end
unless (Params['cd_in'].nil?)
if not File.exists?(Params['conf_file'])
Log.error ("input data file doesn't exist cd_in=%s" % Params['cd_in'])
return
end
end
conf = Configuration.new(Params['conf_file'])
threads = Array.new
conf.server_conf_vec.each do |server|
threads.push(Thread.new { Crawler.new(server, Params['cd_out'], Params['cd_in']) })
end
threads.each { |a| a.join }
join_servers_results(conf.server_conf_vec, Params['cd_out'])
elsif Params['command'] == 'generate_files'
fg = FileGenerator::FileGenerator.new()
fg.run()
end
end
|