root / branches / 1.1 / src / haizea / cli / commands.py @ 798
1 |
# -------------------------------------------------------------------------- #
|
---|---|
2 |
# Copyright 2006-2009, University of Chicago #
|
3 |
# Copyright 2008-2009, Distributed Systems Architecture Group, Universidad #
|
4 |
# Complutense de Madrid (dsa-research.org) #
|
5 |
# #
|
6 |
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
|
7 |
# not use this file except in compliance with the License. You may obtain #
|
8 |
# a copy of the License at #
|
9 |
# #
|
10 |
# http://www.apache.org/licenses/LICENSE-2.0 #
|
11 |
# #
|
12 |
# Unless required by applicable law or agreed to in writing, software #
|
13 |
# distributed under the License is distributed on an "AS IS" BASIS, #
|
14 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
|
15 |
# See the License for the specific language governing permissions and #
|
16 |
# limitations under the License. #
|
17 |
# -------------------------------------------------------------------------- #
|
18 |
|
19 |
from haizea.core.manager import Manager |
20 |
from haizea.common.utils import generate_config_name, unpickle |
21 |
from haizea.core.configfile import HaizeaConfig, HaizeaMultiConfig |
22 |
from haizea.core.accounting import AccountingDataCollection |
23 |
from haizea.common.config import ConfigException |
24 |
from haizea.common.stats import percentile |
25 |
from haizea.cli.optionparser import Option |
26 |
from haizea.cli import Command |
27 |
from haizea.lwf.generators import LWFGenerator, LWFAnnotationGenerator |
28 |
from haizea.lwf.analysis import LWFAnalyser |
29 |
from mx.DateTime import TimeDelta, Parser |
30 |
import haizea.common.defaults as defaults |
31 |
import sys |
32 |
import os |
33 |
import errno |
34 |
import signal |
35 |
from time import sleep |
36 |
|
37 |
try:
|
38 |
import xml.etree.ElementTree as ET |
39 |
except ImportError: |
40 |
# Compatibility with Python <=2.4
|
41 |
import elementtree.ElementTree as ET |
42 |
|
43 |
|
44 |
class haizea(Command): |
45 |
"""
|
46 |
This is the main Haizea command. By default, it will start Haizea as a daemon, which
|
47 |
can receive requests via RPC or interact with other components such as OpenNebula. It can
|
48 |
also start as a foreground process, and write all log messages to the console. All
|
49 |
Haizea options are specified through the configuration file."""
|
50 |
|
51 |
name = "haizea"
|
52 |
|
53 |
def __init__(self, argv): |
54 |
Command.__init__(self, argv)
|
55 |
|
56 |
self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", |
57 |
help = """
|
58 |
The location of the Haizea configuration file. If not
|
59 |
specified, Haizea will first look for it in
|
60 |
/etc/haizea/haizea.conf and then in ~/.haizea/haizea.conf.
|
61 |
"""))
|
62 |
self.optparser.add_option(Option("-f", "--fg", action="store_true", dest="foreground", |
63 |
help = """
|
64 |
Runs Haizea in the foreground.
|
65 |
"""))
|
66 |
self.optparser.add_option(Option("--stop", action="store_true", dest="stop", |
67 |
help = """
|
68 |
Stops the Haizea daemon.
|
69 |
"""))
|
70 |
|
71 |
def run(self): |
72 |
self.parse_options()
|
73 |
|
74 |
pidfile = defaults.DAEMON_PIDFILE # TODO: Make configurable
|
75 |
|
76 |
if self.opt.stop == None: |
77 |
# Start Haizea
|
78 |
|
79 |
# Check if a daemon is already running
|
80 |
if os.path.exists(pidfile):
|
81 |
pf = file(pidfile,'r') |
82 |
pid = int(pf.read().strip())
|
83 |
pf.close() |
84 |
|
85 |
try:
|
86 |
os.kill(pid, signal.SIG_DFL) |
87 |
except OSError, (err, msg): |
88 |
if err == errno.ESRCH:
|
89 |
# Pidfile is stale. Remove it.
|
90 |
os.remove(pidfile) |
91 |
else:
|
92 |
msg = "Unexpected error when checking pid file '%s'.\n%s\n" %(pidfile, msg)
|
93 |
sys.stderr.write(msg) |
94 |
sys.exit(1)
|
95 |
else:
|
96 |
msg = "Haizea seems to be already running (pid %i)\n" % pid
|
97 |
sys.stderr.write(msg) |
98 |
sys.exit(1)
|
99 |
|
100 |
try:
|
101 |
configfile=self.opt.conf
|
102 |
if configfile == None: |
103 |
# Look for config file in default locations
|
104 |
for loc in defaults.CONFIG_LOCATIONS: |
105 |
if os.path.exists(loc):
|
106 |
config = HaizeaConfig.from_file(loc) |
107 |
break
|
108 |
else:
|
109 |
print >> sys.stdout, "No configuration file specified, and none found at default locations." |
110 |
print >> sys.stdout, "Make sure a config file exists at:\n -> %s" % "\n -> ".join(defaults.CONFIG_LOCATIONS) |
111 |
print >> sys.stdout, "Or specify a configuration file with the --conf option." |
112 |
exit(1) |
113 |
else:
|
114 |
config = HaizeaConfig.from_file(configfile) |
115 |
except ConfigException, msg:
|
116 |
print >> sys.stderr, "Error in configuration file:" |
117 |
print >> sys.stderr, msg
|
118 |
exit(1) |
119 |
|
120 |
daemon = not self.opt.foreground |
121 |
|
122 |
manager = Manager(config, daemon, pidfile) |
123 |
|
124 |
manager.start() |
125 |
elif self.opt.stop: # Stop Haizea |
126 |
# Based on code in: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66012
|
127 |
try:
|
128 |
pf = file(pidfile,'r') |
129 |
pid = int(pf.read().strip())
|
130 |
pf.close() |
131 |
except IOError: |
132 |
msg = "Could not stop, pid file '%s' missing.\n"
|
133 |
sys.stderr.write(msg % pidfile) |
134 |
sys.exit(1)
|
135 |
try:
|
136 |
while 1: |
137 |
os.kill(pid, signal.SIGTERM) |
138 |
sleep(1)
|
139 |
except OSError, err: |
140 |
err = str(err)
|
141 |
if err.find("No such process") > 0: |
142 |
os.remove(pidfile) |
143 |
else:
|
144 |
print str(err) |
145 |
sys.exit(1)
|
146 |
|
147 |
class haizea_generate_configs(Command): |
148 |
"""
|
149 |
Takes an Haizea multiconfiguration file and generates the individual
|
150 |
configuration files. See the Haizea manual for more details on multiconfiguration
|
151 |
files."""
|
152 |
|
153 |
name = "haizea-generate-configs"
|
154 |
|
155 |
def __init__(self, argv): |
156 |
Command.__init__(self, argv)
|
157 |
|
158 |
self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", required=True, |
159 |
help = """
|
160 |
Multiconfiguration file.
|
161 |
"""))
|
162 |
self.optparser.add_option(Option("-d", "--dir", action="store", type="string", dest="dir", required=True, |
163 |
help = """
|
164 |
Directory where the individual configuration files
|
165 |
must be created.
|
166 |
"""))
|
167 |
|
168 |
def run(self): |
169 |
self.parse_options()
|
170 |
|
171 |
configfile=self.opt.conf
|
172 |
multiconfig = HaizeaMultiConfig.from_file(configfile) |
173 |
|
174 |
etcdir = self.opt.dir
|
175 |
|
176 |
configs = multiconfig.get_configs() |
177 |
|
178 |
etcdir = os.path.abspath(etcdir) |
179 |
if not os.path.exists(etcdir): |
180 |
os.makedirs(etcdir) |
181 |
|
182 |
for c in configs: |
183 |
profile = c.get_attr("profile")
|
184 |
tracefile = c.get("tracefile")
|
185 |
injfile = c.get("injectionfile")
|
186 |
annotationfile = c.get("annotationfile")
|
187 |
configname = generate_config_name(profile, tracefile, annotationfile, injfile) |
188 |
configfile = etcdir + "/%s.conf" % configname
|
189 |
fc = open(configfile, "w") |
190 |
c.config.write(fc) |
191 |
fc.close() |
192 |
|
193 |
class haizea_generate_scripts(Command): |
194 |
"""
|
195 |
Generates a script, based on a script template, to run all the individual
|
196 |
configuration files generated by haizea-generate-configs. This command
|
197 |
requires Mako Templates for Python (http://www.makotemplates.org/)."""
|
198 |
|
199 |
name = "haizea-generate-scripts"
|
200 |
|
201 |
def __init__(self, argv): |
202 |
Command.__init__(self, argv)
|
203 |
|
204 |
self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", required=True, |
205 |
help = """
|
206 |
Multiconfiguration file used in haizea-generate-configs.
|
207 |
"""))
|
208 |
self.optparser.add_option(Option("-d", "--confdir", action="store", type="string", dest="confdir", required=True, |
209 |
help = """
|
210 |
Directory containing the individual configuration files.
|
211 |
"""))
|
212 |
self.optparser.add_option(Option("-t", "--template", action="store", type="string", dest="template", required=True, |
213 |
help = """
|
214 |
Script template (sample templates are included in /usr/share/haizea/etc)
|
215 |
"""))
|
216 |
self.optparser.add_option(Option("-m", "--only-missing", action="store_true", dest="onlymissing", |
217 |
help = """
|
218 |
If specified, the generated script will only run the configurations
|
219 |
that have not already produced a datafile. This is useful when some simulations
|
220 |
fail, and you don't want to have to rerun them all.
|
221 |
"""))
|
222 |
|
223 |
self.optparser.add_option(Option("-x", "--exclude", action="store", type="string", dest="exclude", |
224 |
help = """
|
225 |
...
|
226 |
"""))
|
227 |
|
228 |
|
229 |
def run(self): |
230 |
self.parse_options()
|
231 |
|
232 |
configfile=self.opt.conf
|
233 |
multiconfig = HaizeaMultiConfig.from_file(configfile) |
234 |
|
235 |
try:
|
236 |
from mako.template import Template |
237 |
except Exception, e: |
238 |
print "You need Mako Templates for Python to run this command." |
239 |
print "You can download them at http://www.makotemplates.org/" |
240 |
exit(1) |
241 |
|
242 |
configs = multiconfig.get_configs() |
243 |
|
244 |
etcdir = os.path.abspath(self.opt.confdir)
|
245 |
if not os.path.exists(etcdir): |
246 |
os.makedirs(etcdir) |
247 |
|
248 |
if self.opt.exclude != None: |
249 |
exclude = self.opt.exclude.split()
|
250 |
else:
|
251 |
exclude = [] |
252 |
|
253 |
templatedata = [] |
254 |
for c in configs: |
255 |
profile = c.get_attr("profile")
|
256 |
tracefile = c.get("tracefile")
|
257 |
injfile = c.get("injectionfile")
|
258 |
datafile = c.get("datafile")
|
259 |
annotationfile = c.get("annotationfile")
|
260 |
configname = generate_config_name(profile, tracefile, annotationfile, injfile) |
261 |
configfile = etcdir + "/%s.conf" % configname
|
262 |
if (not self.opt.onlymissing or not os.path.exists(datafile)) and not configfile in exclude: |
263 |
templatedata.append((configname, configfile)) |
264 |
|
265 |
template = Template(filename=self.opt.template)
|
266 |
print template.render(configs=templatedata, etcdir=etcdir)
|
267 |
|
268 |
|
269 |
class haizea_convert_data(Command): |
270 |
"""
|
271 |
Converts Haizea datafiles into another (easier to process) format.
|
272 |
"""
|
273 |
|
274 |
name = "haizea-convert-data"
|
275 |
|
276 |
def __init__(self, argv): |
277 |
Command.__init__(self, argv)
|
278 |
|
279 |
self.optparser.add_option(Option("-t", "--type", action="store", dest="type", |
280 |
choices = ["per-run", "per-lease", "counter"], |
281 |
help = """
|
282 |
Type of data to produce.
|
283 |
"""))
|
284 |
self.optparser.add_option(Option("-c", "--counter", action="store", dest="counter", |
285 |
help = """
|
286 |
Counter to print out when using '--type counter'.
|
287 |
"""))
|
288 |
self.optparser.add_option(Option("-f", "--format", action="store", type="string", dest="format", |
289 |
help = """
|
290 |
Output format. Currently supported: csv
|
291 |
"""))
|
292 |
self.optparser.add_option(Option("-l", "--list-counters", action="store_true", dest="list_counters", |
293 |
help = """
|
294 |
If specified, the command will just print out the names of counters
|
295 |
stored in the data file and then exit, regardless of other parameters.
|
296 |
"""))
|
297 |
|
298 |
def run(self): |
299 |
self.parse_options()
|
300 |
|
301 |
datafiles=self.args[1:] |
302 |
if len(datafiles) == 0: |
303 |
print "Please specify at least one datafile to convert" |
304 |
exit(1) |
305 |
|
306 |
datafile1 = unpickle(datafiles[0])
|
307 |
|
308 |
counter_names = datafile1.counters.keys() |
309 |
attr_names = datafile1.attrs.keys() |
310 |
lease_stats_names = datafile1.lease_stats_names |
311 |
stats_names = datafile1.stats_names |
312 |
|
313 |
if self.opt.list_counters: |
314 |
for counter in counter_names: |
315 |
print counter
|
316 |
exit(0) |
317 |
|
318 |
if self.opt.type == "per-run": |
319 |
header_fields = attr_names + stats_names |
320 |
elif self.opt.type == "per-lease": |
321 |
header_fields = attr_names + ["lease_id"] + lease_stats_names
|
322 |
elif self.opt.type == "counter": |
323 |
counter = self.opt.counter
|
324 |
if not datafile1.counters.has_key(counter): |
325 |
print "The specified datafile does not have a counter called '%s'" % counter |
326 |
exit(1) |
327 |
header_fields = attr_names + ["time", "value"] |
328 |
if datafile1.counter_avg_type[counter] != AccountingDataCollection.AVERAGE_NONE:
|
329 |
header_fields.append("average")
|
330 |
|
331 |
header = ",".join(header_fields)
|
332 |
|
333 |
print header
|
334 |
|
335 |
for datafile in datafiles: |
336 |
try:
|
337 |
data = unpickle(datafile) |
338 |
|
339 |
attrs = [data.attrs[attr_name] for attr_name in attr_names] |
340 |
|
341 |
if self.opt.type == "per-run": |
342 |
fields = attrs + [str(data.stats[stats_name]) for stats_name in stats_names] |
343 |
print ",".join(fields) |
344 |
elif self.opt.type == "per-lease": |
345 |
leases = data.lease_stats |
346 |
for lease_id, lease_stat in leases.items(): |
347 |
fields = attrs + [`lease_id`] + [str(lease_stat.get(lease_stat_name,"")) for lease_stat_name in lease_stats_names] |
348 |
print ",".join(fields) |
349 |
elif self.opt.type == "counter": |
350 |
for (time, lease_id, value, avg) in data.counters[counter]: |
351 |
fields = attrs + [`time`, `value`] |
352 |
if data.counter_avg_type[counter] != AccountingDataCollection.AVERAGE_NONE:
|
353 |
fields.append(`avg`) |
354 |
print ",".join(fields) |
355 |
except:
|
356 |
print >> sys.stderr, "Error reading file %s" % (datafile) |
357 |
|
358 |
class haizea_lwf_generate(Command): |
359 |
|
360 |
name = "haizea-lwf-generate"
|
361 |
|
362 |
def __init__(self, argv): |
363 |
Command.__init__(self, argv)
|
364 |
self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf", required=True, |
365 |
help = """
|
366 |
LWF file
|
367 |
"""))
|
368 |
self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", |
369 |
help = """
|
370 |
...
|
371 |
"""))
|
372 |
|
373 |
def run(self): |
374 |
self.parse_options()
|
375 |
|
376 |
outfile = self.opt.outf
|
377 |
conffile = self.opt.conf
|
378 |
|
379 |
generator = LWFGenerator(outfile, conffile) |
380 |
|
381 |
generator.generate() |
382 |
|
383 |
class haizea_lwf_stats(Command): |
384 |
|
385 |
name = "haizea-lwf-stats"
|
386 |
|
387 |
def __init__(self, argv): |
388 |
Command.__init__(self, argv)
|
389 |
self.optparser.add_option(Option("-i", "--in", action="store", type="string", dest="inf", required=True, |
390 |
help = """
|
391 |
Input file
|
392 |
"""))
|
393 |
self.optparser.add_option(Option("-l", "--utilization-length", action="store", type="string", dest="utilization_length", |
394 |
help = """
|
395 |
Length of the utilization interval in format DD:HH:MM:SS. Default is until
|
396 |
the time the last lease is requested.
|
397 |
"""))
|
398 |
|
399 |
def run(self): |
400 |
self.parse_options()
|
401 |
|
402 |
infile = self.opt.inf
|
403 |
utilization_length = self.opt.utilization_length
|
404 |
if utilization_length != None: |
405 |
utilization_length = Parser.DateTimeDeltaFromString(utilization_length) |
406 |
|
407 |
analyser = LWFAnalyser(infile, utilization_length) |
408 |
|
409 |
analyser.analyse() |
410 |
|
411 |
class haizea_lwf_annotate(Command): |
412 |
|
413 |
name = "haizea-lwf-annotate"
|
414 |
|
415 |
def __init__(self, argv): |
416 |
Command.__init__(self, argv)
|
417 |
self.optparser.add_option(Option("-i", "--in", action="store", type="string", dest="inf", required=True, |
418 |
help = """
|
419 |
LWF file
|
420 |
"""))
|
421 |
self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf", required=True, |
422 |
help = """
|
423 |
Annotation file
|
424 |
"""))
|
425 |
self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", |
426 |
help = """
|
427 |
...
|
428 |
"""))
|
429 |
|
430 |
def run(self): |
431 |
self.parse_options()
|
432 |
|
433 |
infile = self.opt.inf
|
434 |
outfile = self.opt.outf
|
435 |
conffile = self.opt.conf
|
436 |
|
437 |
generator = LWFAnnotationGenerator(infile, outfile, conffile) |
438 |
|
439 |
generator.generate() |
440 |
|
441 |
|
442 |
# TODO: Has to be moved to the haizea.lwf package, or removed altogether,
|
443 |
# given that this was a one-time thing to convert the old ad hoc LWF files
|
444 |
class haizea_lwf2xml(Command): |
445 |
"""
|
446 |
Converts old Haizea LWF file into new XML-based LWF format
|
447 |
"""
|
448 |
|
449 |
name = "haizea-lwf2xml"
|
450 |
|
451 |
def __init__(self, argv): |
452 |
Command.__init__(self, argv)
|
453 |
|
454 |
self.optparser.add_option(Option("-i", "--in", action="store", type="string", dest="inf", |
455 |
help = """
|
456 |
Input file
|
457 |
"""))
|
458 |
self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf", |
459 |
help = """
|
460 |
Output file
|
461 |
"""))
|
462 |
|
463 |
def run(self): |
464 |
self.parse_options()
|
465 |
|
466 |
infile = self.opt.inf
|
467 |
outfile = self.opt.outf
|
468 |
|
469 |
root = ET.Element("lease-workload")
|
470 |
root.set("name", infile)
|
471 |
description = ET.SubElement(root, "description")
|
472 |
time = TimeDelta(seconds=0)
|
473 |
lease_id = 1
|
474 |
requests = ET.SubElement(root, "lease-requests")
|
475 |
|
476 |
|
477 |
infile = open(infile, "r") |
478 |
for line in infile: |
479 |
if line[0]!='#' and len(line.strip()) != 0: |
480 |
fields = line.split() |
481 |
submit_time = int(fields[0]) |
482 |
start_time = int(fields[1]) |
483 |
duration = int(fields[2]) |
484 |
real_duration = int(fields[3]) |
485 |
num_nodes = int(fields[4]) |
486 |
cpu = int(fields[5]) |
487 |
mem = int(fields[6]) |
488 |
disk = int(fields[7]) |
489 |
vm_image = fields[8]
|
490 |
vm_imagesize = int(fields[9]) |
491 |
|
492 |
|
493 |
|
494 |
lease_request = ET.SubElement(requests, "lease-request")
|
495 |
lease_request.set("arrival", str(TimeDelta(seconds=submit_time))) |
496 |
if real_duration != duration:
|
497 |
realduration = ET.SubElement(lease_request, "realduration")
|
498 |
realduration.set("time", str(TimeDelta(seconds=real_duration))) |
499 |
|
500 |
lease = ET.SubElement(lease_request, "lease")
|
501 |
lease.set("id", `lease_id`)
|
502 |
|
503 |
|
504 |
nodes = ET.SubElement(lease, "nodes")
|
505 |
node_set = ET.SubElement(nodes, "node-set")
|
506 |
node_set.set("numnodes", `num_nodes`)
|
507 |
res = ET.SubElement(node_set, "res")
|
508 |
res.set("type", "CPU") |
509 |
if cpu == 1: |
510 |
res.set("amount", "100") |
511 |
else:
|
512 |
pass
|
513 |
res = ET.SubElement(node_set, "res")
|
514 |
res.set("type", "Memory") |
515 |
res.set("amount", `mem`)
|
516 |
|
517 |
start = ET.SubElement(lease, "start")
|
518 |
if start_time == -1: |
519 |
lease.set("preemptible", "true") |
520 |
else:
|
521 |
lease.set("preemptible", "false") |
522 |
exact = ET.SubElement(start, "exact")
|
523 |
exact.set("time", str(TimeDelta(seconds=start_time))) |
524 |
|
525 |
duration_elem = ET.SubElement(lease, "duration")
|
526 |
duration_elem.set("time", str(TimeDelta(seconds=duration))) |
527 |
|
528 |
software = ET.SubElement(lease, "software")
|
529 |
diskimage = ET.SubElement(software, "disk-image")
|
530 |
diskimage.set("id", vm_image)
|
531 |
diskimage.set("size", `vm_imagesize`)
|
532 |
|
533 |
|
534 |
lease_id += 1
|
535 |
tree = ET.ElementTree(root) |
536 |
print ET.tostring(root)
|
537 |
|
538 |
|
539 |
|
540 |
|
541 |
# TODO: Has to be moved to the haizea.lwf package
|
542 |
class haizea_swf2lwf(Command): |
543 |
"""
|
544 |
Converts Standard Workload Format (SWF, used in the Parallel Workloads Archive at
|
545 |
http://www.cs.huji.ac.il/labs/parallel/workload/) to Lease Workload Format
|
546 |
"""
|
547 |
|
548 |
name = "haizea-swf2lwf"
|
549 |
|
550 |
def __init__(self, argv): |
551 |
Command.__init__(self, argv)
|
552 |
|
553 |
self.optparser.add_option(Option("-i", "--in", action="store", type="string", dest="inf", required=True, |
554 |
help = """
|
555 |
Input file
|
556 |
"""))
|
557 |
self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf", required=True, |
558 |
help = """
|
559 |
Output file
|
560 |
"""))
|
561 |
self.optparser.add_option(Option("-p", "--preemptible", action="store", type="string", dest="preemptible", required=True, |
562 |
help = """
|
563 |
Should the leases be preemptable or not?
|
564 |
"""))
|
565 |
self.optparser.add_option(Option("-f", "--from", action="store", type="string", dest="from_time", default="00:00:00:00", |
566 |
help = """
|
567 |
This parameter, together with the --amount parameter, allows converting just
|
568 |
an interval of the SWF file.
|
569 |
|
570 |
This parameter must be a timestamp of the format DD:HH:MM:SS. Only jobs sumitted
|
571 |
at or after that time will be converted.
|
572 |
|
573 |
Default is 00:00:00:00
|
574 |
"""))
|
575 |
self.optparser.add_option(Option("-l", "--interval-length", action="store", type="string", dest="interval_length", |
576 |
help = """
|
577 |
Length of the interval in format DD:HH:MM:SS. Default is to convert jobs until the
|
578 |
end of the SWF file.
|
579 |
"""))
|
580 |
self.optparser.add_option(Option("-q", "--queues", action="store", type="string", dest="queues", |
581 |
help = """
|
582 |
Only convert jobs from the specified queues
|
583 |
"""))
|
584 |
self.optparser.add_option(Option("-m", "--memory", action="store", type="string", dest="mem", |
585 |
help = """
|
586 |
Memory requested by jobs.
|
587 |
"""))
|
588 |
self.optparser.add_option(Option("-s", "--scale", action="store", type="string", dest="scale", |
589 |
help = """
|
590 |
Scale number of processors by 1/SCALE.
|
591 |
"""))
|
592 |
self.optparser.add_option(Option("-n", "--site", action="store", type="string", dest="site", |
593 |
help = """
|
594 |
File containing site description
|
595 |
"""))
|
596 |
|
597 |
def run(self): |
598 |
self.parse_options()
|
599 |
|
600 |
infile = self.opt.inf
|
601 |
outfile = self.opt.outf
|
602 |
|
603 |
from_time = Parser.DateTimeDeltaFromString(self.opt.from_time)
|
604 |
if self.opt.interval_length == None: |
605 |
to_time = None
|
606 |
else:
|
607 |
to_time = from_time + Parser.DateTimeDeltaFromString(self.opt.interval_length)
|
608 |
|
609 |
root = ET.Element("lease-workload")
|
610 |
root.set("name", infile)
|
611 |
description = ET.SubElement(root, "description")
|
612 |
description.text = "Created with haizea-swf2lwf %s" % " ".join(self.argv[1:]) |
613 |
|
614 |
if self.opt.site != None: |
615 |
site_elem = ET.parse(self.opt.site).getroot()
|
616 |
site_num_nodes = int(site_elem.find("nodes").find("node-set").get("numnodes")) |
617 |
root.append(site_elem) |
618 |
|
619 |
time = TimeDelta(seconds=0)
|
620 |
requests = ET.SubElement(root, "lease-requests")
|
621 |
|
622 |
slowdowns = [] |
623 |
users = set()
|
624 |
utilization = 0
|
625 |
utilization_no_ramp = 0
|
626 |
if to_time == None: |
627 |
swf = open(infile, 'r') |
628 |
lines = swf.readlines() |
629 |
lastline = lines[-1]
|
630 |
to_time = TimeDelta(seconds=int(lastline.split()[1])) |
631 |
swf.close() |
632 |
|
633 |
no_ramp_cutoff = from_time + ((to_time - from_time) * 0.05)
|
634 |
|
635 |
infile = open(infile, "r") |
636 |
for line in infile: |
637 |
if line[0]!=';' and len(line.strip()) != 0: |
638 |
fields = line.split() |
639 |
|
640 |
# Unpack the job's attributes. The description of each field is
|
641 |
# taken from the SWF documentation at
|
642 |
# http://www.cs.huji.ac.il/labs/parallel/workload/swf.html
|
643 |
|
644 |
# Job Number -- a counter field, starting from 1.
|
645 |
job_number = int(fields[0]) |
646 |
|
647 |
# Submit Time -- in seconds. The earliest time the log refers to is zero,
|
648 |
# and is the submittal time the of the first job. The lines in the log are
|
649 |
# sorted by ascending submittal times. It makes sense for jobs to also be
|
650 |
# numbered in this order.
|
651 |
submit_time = int(fields[1]) |
652 |
|
653 |
# Wait Time -- in seconds. The difference between the job's submit time
|
654 |
# and the time at which it actually began to run. Naturally, this is only
|
655 |
# relevant to real logs, not to models.
|
656 |
wait_time = int(fields[2]) |
657 |
|
658 |
# Run Time -- in seconds. The wall clock time the job was running (end
|
659 |
# time minus start time).
|
660 |
# We decided to use ``wait time'' and ``run time'' instead of the equivalent
|
661 |
# ``start time'' and ``end time'' because they are directly attributable to
|
662 |
# the scheduler and application, and are more suitable for models where only
|
663 |
# the run time is relevant.
|
664 |
# Note that when values are rounded to an integral number of seconds (as
|
665 |
# often happens in logs) a run time of 0 is possible and means the job ran
|
666 |
# for less than 0.5 seconds. On the other hand it is permissable to use
|
667 |
# floating point values for time fields.
|
668 |
run_time = int(fields[3]) |
669 |
|
670 |
# Number of Allocated Processors -- an integer. In most cases this is also
|
671 |
# the number of processors the job uses; if the job does not use all of them,
|
672 |
# we typically don't know about it.
|
673 |
num_processors_allocated = int(fields[4]) |
674 |
|
675 |
# Average CPU Time Used -- both user and system, in seconds. This is the
|
676 |
# average over all processors of the CPU time used, and may therefore be
|
677 |
# smaller than the wall clock runtime. If a log contains the total CPU time
|
678 |
# used by all the processors, it is divided by the number of allocated
|
679 |
# processors to derive the average.
|
680 |
avg_cpu_time = float(fields[5]) |
681 |
|
682 |
# Used Memory -- in kilobytes. This is again the average per processor.
|
683 |
used_memory = int(fields[6]) |
684 |
|
685 |
# Requested Number of Processors.
|
686 |
num_processors_requested = int(fields[7]) |
687 |
|
688 |
# Requested Time. This can be either runtime (measured in wallclock seconds),
|
689 |
# or average CPU time per processor (also in seconds) -- the exact meaning
|
690 |
# is determined by a header comment. In many logs this field is used for
|
691 |
# the user runtime estimate (or upper bound) used in backfilling. If a log
|
692 |
# contains a request for total CPU time, it is divided by the number of
|
693 |
# requested processors.
|
694 |
time_requested = int(fields[8]) |
695 |
|
696 |
# Requested Memory (again kilobytes per processor).
|
697 |
mem_requested = int(fields[9]) |
698 |
|
699 |
# Status 1 if the job was completed, 0 if it failed, and 5 if cancelled.
|
700 |
# If information about chekcpointing or swapping is included, other values
|
701 |
# are also possible. See usage note below. This field is meaningless for
|
702 |
# models, so would be -1.
|
703 |
status = int(fields[10]) |
704 |
|
705 |
# User ID -- a natural number, between one and the number of different users.
|
706 |
user_id = int(fields[11]) |
707 |
|
708 |
# Group ID -- a natural number, between one and the number of different groups.
|
709 |
# Some systems control resource usage by groups rather than by individual users.
|
710 |
group_id = int(fields[12]) |
711 |
|
712 |
# Executable (Application) Number -- a natural number, between one and the number
|
713 |
# of different applications appearing in the workload. in some logs, this might
|
714 |
# represent a script file used to run jobs rather than the executable directly;
|
715 |
# this should be noted in a header comment.
|
716 |
exec_number = int(fields[13]) |
717 |
|
718 |
# Queue Number -- a natural number, between one and the number of different
|
719 |
# queues in the system. The nature of the system's queues should be explained
|
720 |
# in a header comment. This field is where batch and interactive jobs should
|
721 |
# be differentiated: we suggest the convention of denoting interactive jobs by 0.
|
722 |
queue = int(fields[14]) |
723 |
|
724 |
# Partition Number -- a natural number, between one and the number of different
|
725 |
# partitions in the systems. The nature of the system's partitions should be
|
726 |
# explained in a header comment. For example, it is possible to use partition
|
727 |
# numbers to identify which machine in a cluster was used.
|
728 |
partition = int(fields[15]) |
729 |
|
730 |
# Preceding Job Number -- this is the number of a previous job in the workload,
|
731 |
# such that the current job can only start after the termination of this preceding
|
732 |
# job. Together with the next field, this allows the workload to include feedback
|
733 |
# as described below.
|
734 |
prec_job = int(fields[16]) |
735 |
|
736 |
# Think Time from Preceding Job -- this is the number of seconds that should elapse
|
737 |
# between the termination of the preceding job and the submittal of this one.
|
738 |
prec_job_thinktime = int(fields[17]) |
739 |
|
740 |
|
741 |
# Check if we have to skip this job
|
742 |
|
743 |
submit_time = TimeDelta(seconds=submit_time) |
744 |
|
745 |
if submit_time < from_time:
|
746 |
continue
|
747 |
|
748 |
if to_time != None and submit_time > to_time: |
749 |
break
|
750 |
|
751 |
if run_time < 0 and status==5: |
752 |
# This is a job that got cancelled while waiting in the queue
|
753 |
continue
|
754 |
|
755 |
if self.opt.queues != None: |
756 |
queues = [int(q) for q in self.opt.queues.split(",")] |
757 |
if queue not in queues: |
758 |
# Job was submitted to a queue we're filtering out
|
759 |
continue
|
760 |
|
761 |
if num_processors_requested == -1: |
762 |
num_processors = num_processors_allocated |
763 |
else:
|
764 |
num_processors = num_processors_requested |
765 |
|
766 |
if self.opt.scale != None: |
767 |
num_processors = int(num_processors/int(self.opt.scale)) |
768 |
|
769 |
lease_request = ET.SubElement(requests, "lease-request")
|
770 |
# Make submission time relative to starting time of trace
|
771 |
lease_request.set("arrival", str(submit_time - from_time)) |
772 |
|
773 |
if run_time == 0: |
774 |
# As specified in the SWF documentation, a runtime of 0 means
|
775 |
# the job ran for less than a second, so we round up to 1.
|
776 |
run_time = 1
|
777 |
realduration = ET.SubElement(lease_request, "realduration")
|
778 |
realduration.set("time", str(TimeDelta(seconds=run_time))) |
779 |
|
780 |
lease = ET.SubElement(lease_request, "lease")
|
781 |
lease.set("id", `job_number`)
|
782 |
|
783 |
|
784 |
nodes = ET.SubElement(lease, "nodes")
|
785 |
node_set = ET.SubElement(nodes, "node-set")
|
786 |
node_set.set("numnodes", `num_processors`)
|
787 |
res = ET.SubElement(node_set, "res")
|
788 |
res.set("type", "CPU") |
789 |
res.set("amount", "100") |
790 |
|
791 |
res = ET.SubElement(node_set, "res")
|
792 |
res.set("type", "Memory") |
793 |
if self.opt.mem != None: |
794 |
res.set("amount", self.opt.mem) |
795 |
elif mem_requested != -1: |
796 |
res.set("amount", `mem_requested / 1024`) |
797 |
else:
|
798 |
print "Cannot convert this file. Job #%i does not specify requested memory, and --memory parameter not specified" % job_number |
799 |
exit(-1) |
800 |
|
801 |
if wait_time != -1: |
802 |
if run_time < 10: |
803 |
run_time2 = 10.0
|
804 |
else:
|
805 |
run_time2 = float(run_time)
|
806 |
slowdown = (wait_time + run_time2) / run_time2 |
807 |
slowdowns.append(slowdown) |
808 |
|
809 |
if not user_id in users: |
810 |
users.add(user_id) |
811 |
|
812 |
# Total utilization
|
813 |
utilization += run_time * num_processors |
814 |
|
815 |
# Removing ramp-up and ramp-down effects
|
816 |
if wait_time != -1 and submit_time + run_time >= no_ramp_cutoff: |
817 |
start_in_interval = max(no_ramp_cutoff, submit_time)
|
818 |
end_in_interval = min(to_time, submit_time + run_time)
|
819 |
time_in_interval = end_in_interval - start_in_interval |
820 |
utilization_no_ramp += time_in_interval * num_processors |
821 |
|
822 |
start = ET.SubElement(lease, "start")
|
823 |
lease.set("preemptible", self.opt.preemptible) |
824 |
lease.set("user", `user_id`)
|
825 |
|
826 |
duration_elem = ET.SubElement(lease, "duration")
|
827 |
duration_elem.set("time", str(TimeDelta(seconds=time_requested))) |
828 |
|
829 |
# No software environment specified. The annotator would have to be used to
|
830 |
# add one (or an image file when running a simulation).
|
831 |
software = ET.SubElement(lease, "software")
|
832 |
diskimage = ET.SubElement(software, "none")
|
833 |
|
834 |
# Add unused SWF attributes to the extra section, for future reference.
|
835 |
extra = ET.SubElement(lease, "extra")
|
836 |
attr = ET.SubElement(extra, "attr")
|
837 |
attr.set("name", "SWF_waittime") |
838 |
attr.set("value", `wait_time`)
|
839 |
attr = ET.SubElement(extra, "attr")
|
840 |
attr.set("name", "SWF_runtime") |
841 |
attr.set("value", `run_time`)
|
842 |
attr = ET.SubElement(extra, "attr")
|
843 |
attr.set("name", "SWF_avgcputime") |
844 |
attr.set("value", `avg_cpu_time`)
|
845 |
attr = ET.SubElement(extra, "attr")
|
846 |
attr.set("name", "SWF_queue") |
847 |
attr.set("value", `queue`)
|
848 |
attr = ET.SubElement(extra, "attr")
|
849 |
attr.set("name", "SWF_group") |
850 |
attr.set("value", `group_id`)
|
851 |
attr = ET.SubElement(extra, "attr")
|
852 |
attr.set("name", "SWF_execnumber") |
853 |
attr.set("value", `exec_number`)
|
854 |
|
855 |
tree = ET.ElementTree(root) |
856 |
|
857 |
outfile = open(outfile, "w") |
858 |
tree.write(outfile) |
859 |
|
860 |
infile.close() |
861 |
outfile.close() |
862 |
|
863 |
slowdowns.sort() |
864 |
|
865 |
total_capacity = site_num_nodes * (to_time - from_time).seconds |
866 |
utilization = float(utilization) / float(total_capacity) |
867 |
utilization_no_ramp = float(utilization_no_ramp) / float(total_capacity) |
868 |
|
869 |
if len(slowdowns) > 0: |
870 |
print "SLOWDOWNS" |
871 |
print "---------" |
872 |
print "min: %.2f" % slowdowns[0] |
873 |
print "10p: %.2f" % percentile(slowdowns, 0.1) |
874 |
print "25p: %.2f" % percentile(slowdowns, 0.25) |
875 |
print "med: %.2f" % percentile(slowdowns, 0.5) |
876 |
print "75p: %.2f" % percentile(slowdowns, 0.75) |
877 |
print "90p: %.2f" % percentile(slowdowns, 0.9) |
878 |
print "max: %.2f" % slowdowns[-1] |
879 |
print
|
880 |
print "USERS" |
881 |
print "-----" |
882 |
print "Number of users: %i" % len(users) |
883 |
print
|
884 |
print "UTILIZATION" |
885 |
print "-----------" |
886 |
print "Utilization: %.2f%%" % (utilization * 100) |
887 |
if utilization_no_ramp != 0: |
888 |
print "Utilization (no ramp-up/ramp-down): %.2f%%" % (utilization_no_ramp * 100) |
889 |
|