root / branches / 1.1 / src / haizea / cli / commands.py @ 707
1 |
# -------------------------------------------------------------------------- #
|
---|---|
2 |
# Copyright 2006-2009, University of Chicago #
|
3 |
# Copyright 2008-2009, Distributed Systems Architecture Group, Universidad #
|
4 |
# Complutense de Madrid (dsa-research.org) #
|
5 |
# #
|
6 |
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
|
7 |
# not use this file except in compliance with the License. You may obtain #
|
8 |
# a copy of the License at #
|
9 |
# #
|
10 |
# http://www.apache.org/licenses/LICENSE-2.0 #
|
11 |
# #
|
12 |
# Unless required by applicable law or agreed to in writing, software #
|
13 |
# distributed under the License is distributed on an "AS IS" BASIS, #
|
14 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
|
15 |
# See the License for the specific language governing permissions and #
|
16 |
# limitations under the License. #
|
17 |
# -------------------------------------------------------------------------- #
|
18 |
|
19 |
from haizea.core.manager import Manager |
20 |
from haizea.common.utils import generate_config_name, unpickle |
21 |
from haizea.core.configfile import HaizeaConfig, HaizeaMultiConfig |
22 |
from haizea.core.accounting import AccountingDataCollection |
23 |
from haizea.common.config import ConfigException |
24 |
from haizea.common.stats import percentile |
25 |
from haizea.cli.optionparser import Option |
26 |
from haizea.cli import Command |
27 |
from mx.DateTime import TimeDelta, Parser |
28 |
import haizea.common.defaults as defaults |
29 |
import sys |
30 |
import os |
31 |
import errno |
32 |
import signal |
33 |
from time import sleep |
34 |
|
35 |
try:
|
36 |
import xml.etree.ElementTree as ET |
37 |
except ImportError: |
38 |
# Compatibility with Python <=2.4
|
39 |
import elementtree.ElementTree as ET |
40 |
|
41 |
|
42 |
class haizea(Command): |
43 |
"""
|
44 |
This is the main Haizea command. By default, it will start Haizea as a daemon, which
|
45 |
can receive requests via RPC or interact with other components such as OpenNebula. It can
|
46 |
also start as a foreground process, and write all log messages to the console. All
|
47 |
Haizea options are specified through the configuration file."""
|
48 |
|
49 |
name = "haizea"
|
50 |
|
51 |
def __init__(self, argv): |
52 |
Command.__init__(self, argv)
|
53 |
|
54 |
self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", |
55 |
help = """
|
56 |
The location of the Haizea configuration file. If not
|
57 |
specified, Haizea will first look for it in
|
58 |
/etc/haizea/haizea.conf and then in ~/.haizea/haizea.conf.
|
59 |
"""))
|
60 |
self.optparser.add_option(Option("-f", "--fg", action="store_true", dest="foreground", |
61 |
help = """
|
62 |
Runs Haizea in the foreground.
|
63 |
"""))
|
64 |
self.optparser.add_option(Option("--stop", action="store_true", dest="stop", |
65 |
help = """
|
66 |
Stops the Haizea daemon.
|
67 |
"""))
|
68 |
|
69 |
def run(self): |
70 |
self.parse_options()
|
71 |
|
72 |
pidfile = defaults.DAEMON_PIDFILE # TODO: Make configurable
|
73 |
|
74 |
if self.opt.stop == None: |
75 |
# Start Haizea
|
76 |
|
77 |
# Check if a daemon is already running
|
78 |
if os.path.exists(pidfile):
|
79 |
pf = file(pidfile,'r') |
80 |
pid = int(pf.read().strip())
|
81 |
pf.close() |
82 |
|
83 |
try:
|
84 |
os.kill(pid, signal.SIG_DFL) |
85 |
except OSError, (err, msg): |
86 |
if err == errno.ESRCH:
|
87 |
# Pidfile is stale. Remove it.
|
88 |
os.remove(pidfile) |
89 |
else:
|
90 |
msg = "Unexpected error when checking pid file '%s'.\n%s\n" %(pidfile, msg)
|
91 |
sys.stderr.write(msg) |
92 |
sys.exit(1)
|
93 |
else:
|
94 |
msg = "Haizea seems to be already running (pid %i)\n" % pid
|
95 |
sys.stderr.write(msg) |
96 |
sys.exit(1)
|
97 |
|
98 |
try:
|
99 |
configfile=self.opt.conf
|
100 |
if configfile == None: |
101 |
# Look for config file in default locations
|
102 |
for loc in defaults.CONFIG_LOCATIONS: |
103 |
if os.path.exists(loc):
|
104 |
config = HaizeaConfig.from_file(loc) |
105 |
break
|
106 |
else:
|
107 |
print >> sys.stdout, "No configuration file specified, and none found at default locations." |
108 |
print >> sys.stdout, "Make sure a config file exists at:\n -> %s" % "\n -> ".join(defaults.CONFIG_LOCATIONS) |
109 |
print >> sys.stdout, "Or specify a configuration file with the --conf option." |
110 |
exit(1) |
111 |
else:
|
112 |
config = HaizeaConfig.from_file(configfile) |
113 |
except ConfigException, msg:
|
114 |
print >> sys.stderr, "Error in configuration file:" |
115 |
print >> sys.stderr, msg
|
116 |
exit(1) |
117 |
|
118 |
daemon = not self.opt.foreground |
119 |
|
120 |
manager = Manager(config, daemon, pidfile) |
121 |
|
122 |
manager.start() |
123 |
elif self.opt.stop: # Stop Haizea |
124 |
# Based on code in: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66012
|
125 |
try:
|
126 |
pf = file(pidfile,'r') |
127 |
pid = int(pf.read().strip())
|
128 |
pf.close() |
129 |
except IOError: |
130 |
msg = "Could not stop, pid file '%s' missing.\n"
|
131 |
sys.stderr.write(msg % pidfile) |
132 |
sys.exit(1)
|
133 |
try:
|
134 |
while 1: |
135 |
os.kill(pid, signal.SIGTERM) |
136 |
sleep(1)
|
137 |
except OSError, err: |
138 |
err = str(err)
|
139 |
if err.find("No such process") > 0: |
140 |
os.remove(pidfile) |
141 |
else:
|
142 |
print str(err) |
143 |
sys.exit(1)
|
144 |
|
145 |
class haizea_generate_configs(Command): |
146 |
"""
|
147 |
Takes an Haizea multiconfiguration file and generates the individual
|
148 |
configuration files. See the Haizea manual for more details on multiconfiguration
|
149 |
files."""
|
150 |
|
151 |
name = "haizea-generate-configs"
|
152 |
|
153 |
def __init__(self, argv): |
154 |
Command.__init__(self, argv)
|
155 |
|
156 |
self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", required=True, |
157 |
help = """
|
158 |
Multiconfiguration file.
|
159 |
"""))
|
160 |
self.optparser.add_option(Option("-d", "--dir", action="store", type="string", dest="dir", required=True, |
161 |
help = """
|
162 |
Directory where the individual configuration files
|
163 |
must be created.
|
164 |
"""))
|
165 |
|
166 |
def run(self): |
167 |
self.parse_options()
|
168 |
|
169 |
configfile=self.opt.conf
|
170 |
multiconfig = HaizeaMultiConfig.from_file(configfile) |
171 |
|
172 |
etcdir = self.opt.dir
|
173 |
|
174 |
configs = multiconfig.get_configs() |
175 |
|
176 |
etcdir = os.path.abspath(etcdir) |
177 |
if not os.path.exists(etcdir): |
178 |
os.makedirs(etcdir) |
179 |
|
180 |
for c in configs: |
181 |
profile = c.get_attr("profile")
|
182 |
tracefile = c.get("tracefile")
|
183 |
injfile = c.get("injectionfile")
|
184 |
annotationfile = c.get("annotationfile")
|
185 |
configname = generate_config_name(profile, tracefile, annotationfile, injfile) |
186 |
configfile = etcdir + "/%s.conf" % configname
|
187 |
fc = open(configfile, "w") |
188 |
c.config.write(fc) |
189 |
fc.close() |
190 |
|
191 |
class haizea_generate_scripts(Command): |
192 |
"""
|
193 |
Generates a script, based on a script template, to run all the individual
|
194 |
configuration files generated by haizea-generate-configs. This command
|
195 |
requires Mako Templates for Python (http://www.makotemplates.org/)."""
|
196 |
|
197 |
name = "haizea-generate-scripts"
|
198 |
|
199 |
def __init__(self, argv): |
200 |
Command.__init__(self, argv)
|
201 |
|
202 |
self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", required=True, |
203 |
help = """
|
204 |
Multiconfiguration file used in haizea-generate-configs.
|
205 |
"""))
|
206 |
self.optparser.add_option(Option("-d", "--confdir", action="store", type="string", dest="confdir", required=True, |
207 |
help = """
|
208 |
Directory containing the individual configuration files.
|
209 |
"""))
|
210 |
self.optparser.add_option(Option("-t", "--template", action="store", type="string", dest="template", required=True, |
211 |
help = """
|
212 |
Script template (sample templates are included in /usr/share/haizea/etc)
|
213 |
"""))
|
214 |
self.optparser.add_option(Option("-m", "--only-missing", action="store_true", dest="onlymissing", |
215 |
help = """
|
216 |
If specified, the generated script will only run the configurations
|
217 |
that have not already produced a datafile. This is useful when some simulations
|
218 |
fail, and you don't want to have to rerun them all.
|
219 |
"""))
|
220 |
|
221 |
def run(self): |
222 |
self.parse_options()
|
223 |
|
224 |
configfile=self.opt.conf
|
225 |
multiconfig = HaizeaMultiConfig.from_file(configfile) |
226 |
|
227 |
try:
|
228 |
from mako.template import Template |
229 |
except Exception, e: |
230 |
print "You need Mako Templates for Python to run this command." |
231 |
print "You can download them at http://www.makotemplates.org/" |
232 |
exit(1) |
233 |
|
234 |
configs = multiconfig.get_configs() |
235 |
|
236 |
etcdir = os.path.abspath(self.opt.confdir)
|
237 |
if not os.path.exists(etcdir): |
238 |
os.makedirs(etcdir) |
239 |
|
240 |
templatedata = [] |
241 |
for c in configs: |
242 |
profile = c.get_attr("profile")
|
243 |
tracefile = c.get("tracefile")
|
244 |
injfile = c.get("injectionfile")
|
245 |
datafile = c.get("datafile")
|
246 |
annotationfile = c.get("annotationfile")
|
247 |
configname = generate_config_name(profile, tracefile, annotationfile, injfile) |
248 |
if not self.opt.onlymissing or not os.path.exists(datafile): |
249 |
configfile = etcdir + "/%s.conf" % configname
|
250 |
templatedata.append((configname, configfile)) |
251 |
|
252 |
template = Template(filename=self.opt.template)
|
253 |
print template.render(configs=templatedata, etcdir=etcdir)
|
254 |
|
255 |
|
256 |
class haizea_convert_data(Command): |
257 |
"""
|
258 |
Converts Haizea datafiles into another (easier to process) format.
|
259 |
"""
|
260 |
|
261 |
name = "haizea-convert-data"
|
262 |
|
263 |
def __init__(self, argv): |
264 |
Command.__init__(self, argv)
|
265 |
|
266 |
self.optparser.add_option(Option("-t", "--type", action="store", dest="type", |
267 |
choices = ["per-run", "per-lease", "counter"], |
268 |
help = """
|
269 |
Type of data to produce.
|
270 |
"""))
|
271 |
self.optparser.add_option(Option("-c", "--counter", action="store", dest="counter", |
272 |
help = """
|
273 |
Counter to print out when using '--type counter'.
|
274 |
"""))
|
275 |
self.optparser.add_option(Option("-f", "--format", action="store", type="string", dest="format", |
276 |
help = """
|
277 |
Output format. Currently supported: csv
|
278 |
"""))
|
279 |
self.optparser.add_option(Option("-l", "--list-counters", action="store_true", dest="list_counters", |
280 |
help = """
|
281 |
If specified, the command will just print out the names of counters
|
282 |
stored in the data file and then exit, regardless of other parameters.
|
283 |
"""))
|
284 |
|
285 |
def run(self): |
286 |
self.parse_options()
|
287 |
|
288 |
datafiles=self.args[1:] |
289 |
if len(datafiles) == 0: |
290 |
print "Please specify at least one datafile to convert" |
291 |
exit(1) |
292 |
|
293 |
datafile1 = unpickle(datafiles[0])
|
294 |
|
295 |
counter_names = datafile1.counters.keys() |
296 |
attr_names = datafile1.attrs.keys() |
297 |
lease_stats_names = datafile1.lease_stats_names |
298 |
stats_names = datafile1.stats_names |
299 |
|
300 |
if self.opt.list_counters: |
301 |
for counter in counter_names: |
302 |
print counter
|
303 |
exit(0) |
304 |
|
305 |
if self.opt.type == "per-run": |
306 |
header_fields = attr_names + stats_names |
307 |
elif self.opt.type == "per-lease": |
308 |
header_fields = attr_names + ["lease_id"] + lease_stats_names
|
309 |
elif self.opt.type == "counter": |
310 |
counter = self.opt.counter
|
311 |
if not datafile1.counters.has_key(counter): |
312 |
print "The specified datafile does not have a counter called '%s'" % counter |
313 |
exit(1) |
314 |
header_fields = attr_names + ["time", "value"] |
315 |
if datafile1.counter_avg_type[counter] != AccountingDataCollection.AVERAGE_NONE:
|
316 |
header_fields.append("average")
|
317 |
|
318 |
header = ",".join(header_fields)
|
319 |
|
320 |
print header
|
321 |
|
322 |
for datafile in datafiles: |
323 |
data = unpickle(datafile) |
324 |
|
325 |
attrs = [data.attrs[attr_name] for attr_name in attr_names] |
326 |
|
327 |
if self.opt.type == "per-run": |
328 |
fields = attrs + [`data.stats[stats_name]` for stats_name in stats_names] |
329 |
print ",".join(fields) |
330 |
elif self.opt.type == "per-lease": |
331 |
leases = data.lease_stats |
332 |
for lease_id, lease_stat in leases.items(): |
333 |
fields = attrs + [`lease_id`] + [`lease_stat.get(lease_stat_name,"")` for lease_stat_name in lease_stats_names] |
334 |
print ",".join(fields) |
335 |
elif self.opt.type == "counter": |
336 |
for (time, lease_id, value, avg) in data.counters[counter]: |
337 |
fields = attrs + [`time`, `value`] |
338 |
if data.counter_avg_type[counter] != AccountingDataCollection.AVERAGE_NONE:
|
339 |
fields.append(`avg`) |
340 |
print ",".join(fields) |
341 |
|
342 |
|
343 |
|
344 |
class haizea_lwf2xml(Command): |
345 |
"""
|
346 |
Converts old Haizea LWF file into new XML-based LWF format
|
347 |
"""
|
348 |
|
349 |
name = "haizea-lwf2xml"
|
350 |
|
351 |
def __init__(self, argv): |
352 |
Command.__init__(self, argv)
|
353 |
|
354 |
self.optparser.add_option(Option("-i", "--in", action="store", type="string", dest="inf", |
355 |
help = """
|
356 |
Input file
|
357 |
"""))
|
358 |
self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf", |
359 |
help = """
|
360 |
Output file
|
361 |
"""))
|
362 |
|
363 |
def run(self): |
364 |
self.parse_options()
|
365 |
|
366 |
infile = self.opt.inf
|
367 |
outfile = self.opt.outf
|
368 |
|
369 |
root = ET.Element("lease-workload")
|
370 |
root.set("name", infile)
|
371 |
description = ET.SubElement(root, "description")
|
372 |
time = TimeDelta(seconds=0)
|
373 |
lease_id = 1
|
374 |
requests = ET.SubElement(root, "lease-requests")
|
375 |
|
376 |
|
377 |
infile = open(infile, "r") |
378 |
for line in infile: |
379 |
if line[0]!='#' and len(line.strip()) != 0: |
380 |
fields = line.split() |
381 |
submit_time = int(fields[0]) |
382 |
start_time = int(fields[1]) |
383 |
duration = int(fields[2]) |
384 |
real_duration = int(fields[3]) |
385 |
num_nodes = int(fields[4]) |
386 |
cpu = int(fields[5]) |
387 |
mem = int(fields[6]) |
388 |
disk = int(fields[7]) |
389 |
vm_image = fields[8]
|
390 |
vm_imagesize = int(fields[9]) |
391 |
|
392 |
|
393 |
|
394 |
lease_request = ET.SubElement(requests, "lease-request")
|
395 |
lease_request.set("arrival", str(TimeDelta(seconds=submit_time))) |
396 |
if real_duration != duration:
|
397 |
realduration = ET.SubElement(lease_request, "realduration")
|
398 |
realduration.set("time", str(TimeDelta(seconds=real_duration))) |
399 |
|
400 |
lease = ET.SubElement(lease_request, "lease")
|
401 |
lease.set("id", `lease_id`)
|
402 |
|
403 |
|
404 |
nodes = ET.SubElement(lease, "nodes")
|
405 |
node_set = ET.SubElement(nodes, "node-set")
|
406 |
node_set.set("numnodes", `num_nodes`)
|
407 |
res = ET.SubElement(node_set, "res")
|
408 |
res.set("type", "CPU") |
409 |
if cpu == 1: |
410 |
res.set("amount", "100") |
411 |
else:
|
412 |
pass
|
413 |
res = ET.SubElement(node_set, "res")
|
414 |
res.set("type", "Memory") |
415 |
res.set("amount", `mem`)
|
416 |
|
417 |
start = ET.SubElement(lease, "start")
|
418 |
if start_time == -1: |
419 |
lease.set("preemptible", "true") |
420 |
else:
|
421 |
lease.set("preemptible", "false") |
422 |
exact = ET.SubElement(start, "exact")
|
423 |
exact.set("time", str(TimeDelta(seconds=start_time))) |
424 |
|
425 |
duration_elem = ET.SubElement(lease, "duration")
|
426 |
duration_elem.set("time", str(TimeDelta(seconds=duration))) |
427 |
|
428 |
software = ET.SubElement(lease, "software")
|
429 |
diskimage = ET.SubElement(software, "disk-image")
|
430 |
diskimage.set("id", vm_image)
|
431 |
diskimage.set("size", `vm_imagesize`)
|
432 |
|
433 |
|
434 |
lease_id += 1
|
435 |
tree = ET.ElementTree(root) |
436 |
print ET.tostring(root)
|
437 |
|
438 |
|
439 |
|
440 |
class haizea_swf2lwf(Command): |
441 |
"""
|
442 |
Converts Standard Workload Format (SWF, used in the Parallel Workloads Archive at
|
443 |
http://www.cs.huji.ac.il/labs/parallel/workload/) to Lease Workload Format
|
444 |
"""
|
445 |
|
446 |
name = "haizea-swf2lwf"
|
447 |
|
448 |
def __init__(self, argv): |
449 |
Command.__init__(self, argv)
|
450 |
|
451 |
self.optparser.add_option(Option("-i", "--in", action="store", type="string", dest="inf", required=True, |
452 |
help = """
|
453 |
Input file
|
454 |
"""))
|
455 |
self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf", required=True, |
456 |
help = """
|
457 |
Output file
|
458 |
"""))
|
459 |
self.optparser.add_option(Option("-p", "--preemptible", action="store", type="string", dest="preemptible", required=True, |
460 |
help = """
|
461 |
Should the leases be preemptable or not?
|
462 |
"""))
|
463 |
self.optparser.add_option(Option("-f", "--from", action="store", type="string", dest="from_time", default="00:00:00:00", |
464 |
help = """
|
465 |
This parameter, together with the --amount parameter, allows converting just
|
466 |
an interval of the SWF file.
|
467 |
|
468 |
This parameter must be a timestamp of the format DD:HH:MM:SS. Only jobs sumitted
|
469 |
at or after that time will be converted.
|
470 |
|
471 |
Default is 00:00:00:00
|
472 |
"""))
|
473 |
self.optparser.add_option(Option("-l", "--interval-length", action="store", type="string", dest="interval_length", |
474 |
help = """
|
475 |
Length of the interval in format DD:HH:MM:SS. Default is to convert jobs until the
|
476 |
end of the SWF file.
|
477 |
"""))
|
478 |
self.optparser.add_option(Option("-q", "--queues", action="store", type="string", dest="queues", |
479 |
help = """
|
480 |
Only convert jobs from the specified queues
|
481 |
"""))
|
482 |
self.optparser.add_option(Option("-m", "--memory", action="store", type="string", dest="mem", |
483 |
help = """
|
484 |
Memory requested by jobs.
|
485 |
"""))
|
486 |
self.optparser.add_option(Option("-s", "--scale", action="store", type="string", dest="scale", |
487 |
help = """
|
488 |
Scale number of processors by 1/SCALE.
|
489 |
"""))
|
490 |
|
491 |
def run(self): |
492 |
self.parse_options()
|
493 |
|
494 |
infile = self.opt.inf
|
495 |
outfile = self.opt.outf
|
496 |
|
497 |
from_time = Parser.DateTimeDeltaFromString(self.opt.from_time)
|
498 |
if self.opt.interval_length == None: |
499 |
to_time = None
|
500 |
else:
|
501 |
to_time = from_time + Parser.DateTimeDeltaFromString(self.opt.interval_length)
|
502 |
|
503 |
root = ET.Element("lease-workload")
|
504 |
root.set("name", infile)
|
505 |
description = ET.SubElement(root, "description")
|
506 |
description.text = "Created with haizea-swf2lwf %s" % " ".join(self.argv[1:]) |
507 |
time = TimeDelta(seconds=0)
|
508 |
requests = ET.SubElement(root, "lease-requests")
|
509 |
|
510 |
slowdowns = [] |
511 |
users = set()
|
512 |
|
513 |
infile = open(infile, "r") |
514 |
for line in infile: |
515 |
if line[0]!=';' and len(line.strip()) != 0: |
516 |
fields = line.split() |
517 |
|
518 |
# Unpack the job's attributes. The description of each field is
|
519 |
# taken from the SWF documentation at
|
520 |
# http://www.cs.huji.ac.il/labs/parallel/workload/swf.html
|
521 |
|
522 |
# Job Number -- a counter field, starting from 1.
|
523 |
job_number = int(fields[0]) |
524 |
|
525 |
# Submit Time -- in seconds. The earliest time the log refers to is zero,
|
526 |
# and is the submittal time the of the first job. The lines in the log are
|
527 |
# sorted by ascending submittal times. It makes sense for jobs to also be
|
528 |
# numbered in this order.
|
529 |
submit_time = int(fields[1]) |
530 |
|
531 |
# Wait Time -- in seconds. The difference between the job's submit time
|
532 |
# and the time at which it actually began to run. Naturally, this is only
|
533 |
# relevant to real logs, not to models.
|
534 |
wait_time = int(fields[2]) |
535 |
|
536 |
# Run Time -- in seconds. The wall clock time the job was running (end
|
537 |
# time minus start time).
|
538 |
# We decided to use ``wait time'' and ``run time'' instead of the equivalent
|
539 |
# ``start time'' and ``end time'' because they are directly attributable to
|
540 |
# the scheduler and application, and are more suitable for models where only
|
541 |
# the run time is relevant.
|
542 |
# Note that when values are rounded to an integral number of seconds (as
|
543 |
# often happens in logs) a run time of 0 is possible and means the job ran
|
544 |
# for less than 0.5 seconds. On the other hand it is permissable to use
|
545 |
# floating point values for time fields.
|
546 |
run_time = int(fields[3]) |
547 |
|
548 |
# Number of Allocated Processors -- an integer. In most cases this is also
|
549 |
# the number of processors the job uses; if the job does not use all of them,
|
550 |
# we typically don't know about it.
|
551 |
num_processors_allocated = int(fields[4]) |
552 |
|
553 |
# Average CPU Time Used -- both user and system, in seconds. This is the
|
554 |
# average over all processors of the CPU time used, and may therefore be
|
555 |
# smaller than the wall clock runtime. If a log contains the total CPU time
|
556 |
# used by all the processors, it is divided by the number of allocated
|
557 |
# processors to derive the average.
|
558 |
avg_cpu_time = float(fields[5]) |
559 |
|
560 |
# Used Memory -- in kilobytes. This is again the average per processor.
|
561 |
used_memory = int(fields[6]) |
562 |
|
563 |
# Requested Number of Processors.
|
564 |
num_processors_requested = int(fields[7]) |
565 |
|
566 |
# Requested Time. This can be either runtime (measured in wallclock seconds),
|
567 |
# or average CPU time per processor (also in seconds) -- the exact meaning
|
568 |
# is determined by a header comment. In many logs this field is used for
|
569 |
# the user runtime estimate (or upper bound) used in backfilling. If a log
|
570 |
# contains a request for total CPU time, it is divided by the number of
|
571 |
# requested processors.
|
572 |
time_requested = int(fields[8]) |
573 |
|
574 |
# Requested Memory (again kilobytes per processor).
|
575 |
mem_requested = int(fields[9]) |
576 |
|
577 |
# Status 1 if the job was completed, 0 if it failed, and 5 if cancelled.
|
578 |
# If information about chekcpointing or swapping is included, other values
|
579 |
# are also possible. See usage note below. This field is meaningless for
|
580 |
# models, so would be -1.
|
581 |
status = int(fields[10]) |
582 |
|
583 |
# User ID -- a natural number, between one and the number of different users.
|
584 |
user_id = int(fields[11]) |
585 |
|
586 |
# Group ID -- a natural number, between one and the number of different groups.
|
587 |
# Some systems control resource usage by groups rather than by individual users.
|
588 |
group_id = int(fields[12]) |
589 |
|
590 |
# Executable (Application) Number -- a natural number, between one and the number
|
591 |
# of different applications appearing in the workload. in some logs, this might
|
592 |
# represent a script file used to run jobs rather than the executable directly;
|
593 |
# this should be noted in a header comment.
|
594 |
exec_number = int(fields[13]) |
595 |
|
596 |
# Queue Number -- a natural number, between one and the number of different
|
597 |
# queues in the system. The nature of the system's queues should be explained
|
598 |
# in a header comment. This field is where batch and interactive jobs should
|
599 |
# be differentiated: we suggest the convention of denoting interactive jobs by 0.
|
600 |
queue = int(fields[14]) |
601 |
|
602 |
# Partition Number -- a natural number, between one and the number of different
|
603 |
# partitions in the systems. The nature of the system's partitions should be
|
604 |
# explained in a header comment. For example, it is possible to use partition
|
605 |
# numbers to identify which machine in a cluster was used.
|
606 |
partition = int(fields[15]) |
607 |
|
608 |
# Preceding Job Number -- this is the number of a previous job in the workload,
|
609 |
# such that the current job can only start after the termination of this preceding
|
610 |
# job. Together with the next field, this allows the workload to include feedback
|
611 |
# as described below.
|
612 |
prec_job = int(fields[16]) |
613 |
|
614 |
# Think Time from Preceding Job -- this is the number of seconds that should elapse
|
615 |
# between the termination of the preceding job and the submittal of this one.
|
616 |
prec_job_thinktime = int(fields[17]) |
617 |
|
618 |
|
619 |
# Check if we have to skip this job
|
620 |
|
621 |
submit_time = TimeDelta(seconds=submit_time) |
622 |
|
623 |
if submit_time < from_time:
|
624 |
continue
|
625 |
|
626 |
if to_time != None and submit_time > to_time: |
627 |
break
|
628 |
|
629 |
if run_time < 0 and status==5: |
630 |
# This is a job that got cancelled while waiting in the queue
|
631 |
continue
|
632 |
|
633 |
if self.opt.queues != None: |
634 |
queues = [int(q) for q in self.opt.queues.split(",")] |
635 |
if queue not in queues: |
636 |
# Job was submitted to a queue we're filtering out
|
637 |
continue
|
638 |
|
639 |
if self.opt.scale != None: |
640 |
num_processors_requested = int(num_processors_requested/int(self.opt.scale)) |
641 |
|
642 |
# Make submission time relative to starting time of trace
|
643 |
submit_time = submit_time - from_time |
644 |
|
645 |
lease_request = ET.SubElement(requests, "lease-request")
|
646 |
lease_request.set("arrival", str(submit_time)) |
647 |
|
648 |
if run_time == 0: |
649 |
# As specified in the SWF documentation, a runtime of 0 means
|
650 |
# the job ran for less than a second, so we round up to 1.
|
651 |
run_time = 1
|
652 |
realduration = ET.SubElement(lease_request, "realduration")
|
653 |
realduration.set("time", str(TimeDelta(seconds=run_time))) |
654 |
|
655 |
lease = ET.SubElement(lease_request, "lease")
|
656 |
lease.set("id", `job_number`)
|
657 |
|
658 |
|
659 |
nodes = ET.SubElement(lease, "nodes")
|
660 |
node_set = ET.SubElement(nodes, "node-set")
|
661 |
node_set.set("numnodes", `num_processors_requested`)
|
662 |
res = ET.SubElement(node_set, "res")
|
663 |
res.set("type", "CPU") |
664 |
res.set("amount", "100") |
665 |
|
666 |
res = ET.SubElement(node_set, "res")
|
667 |
res.set("type", "Memory") |
668 |
if self.opt.mem != None: |
669 |
res.set("amount", self.opt.mem) |
670 |
elif mem_requested != -1: |
671 |
res.set("amount", `mem_requested / 1024`) |
672 |
else:
|
673 |
print "Cannot convert this file. Job #%i does not specify requested memory, and --memory parameter not specified" % job_number |
674 |
exit(-1) |
675 |
|
676 |
if run_time < 10: |
677 |
run_time2 = 10
|
678 |
else:
|
679 |
run_time2 = run_time |
680 |
slowdown = wait_time + run_time2 / float(run_time2)
|
681 |
slowdowns.append(slowdown) |
682 |
|
683 |
if not user_id in users: |
684 |
users.add(user_id) |
685 |
|
686 |
start = ET.SubElement(lease, "start")
|
687 |
#lease.set("preemptible", self.opt.preemptible)
|
688 |
lease.set("user", `user_id`)
|
689 |
|
690 |
duration_elem = ET.SubElement(lease, "duration")
|
691 |
duration_elem.set("time", str(TimeDelta(seconds=time_requested))) |
692 |
|
693 |
# No software environment specified. The annotator would have to be used to
|
694 |
# add one (or an image file when running a simulation).
|
695 |
software = ET.SubElement(lease, "software")
|
696 |
diskimage = ET.SubElement(software, "none")
|
697 |
|
698 |
# Add unused SWF attributes to the extra section, for future reference.
|
699 |
extra = ET.SubElement(lease, "extra")
|
700 |
attr = ET.SubElement(extra, "attr")
|
701 |
attr.set("name", "SWF_waittime") |
702 |
attr.set("value", `wait_time`)
|
703 |
attr = ET.SubElement(extra, "attr")
|
704 |
attr.set("name", "SWF_avgcputime") |
705 |
attr.set("value", `avg_cpu_time`)
|
706 |
attr = ET.SubElement(extra, "attr")
|
707 |
attr.set("name", "SWF_queue") |
708 |
attr.set("value", `queue`)
|
709 |
attr = ET.SubElement(extra, "attr")
|
710 |
attr.set("name", "SWF_group") |
711 |
attr.set("value", `group_id`)
|
712 |
attr = ET.SubElement(extra, "attr")
|
713 |
attr.set("name", "SWF_execnumber") |
714 |
attr.set("value", `exec_number`)
|
715 |
|
716 |
tree = ET.ElementTree(root) |
717 |
|
718 |
outfile = open(outfile, "w") |
719 |
tree.write(outfile) |
720 |
|
721 |
infile.close() |
722 |
outfile.close() |
723 |
|
724 |
slowdowns.sort() |
725 |
|
726 |
print "SLOWDOWNS" |
727 |
print "---------" |
728 |
print "min: %.2f" % slowdowns[0] |
729 |
print "10p: %.2f" % percentile(slowdowns, 0.1) |
730 |
print "25p: %.2f" % percentile(slowdowns, 0.25) |
731 |
print "med: %.2f" % percentile(slowdowns, 0.5) |
732 |
print "75p: %.2f" % percentile(slowdowns, 0.75) |
733 |
print "90p: %.2f" % percentile(slowdowns, 0.9) |
734 |
print "max: %.2f" % slowdowns[-1] |
735 |
print
|
736 |
print "USERS" |
737 |
print "-----" |
738 |
print "Number of users: %i" % len(users) |
739 |
|