Project

General

Profile

root / branches / 1.1 / src / haizea / cli / commands.py @ 707

1
# -------------------------------------------------------------------------- #
2
# Copyright 2006-2009, University of Chicago                                 #
3
# Copyright 2008-2009, Distributed Systems Architecture Group, Universidad   #
4
# Complutense de Madrid (dsa-research.org)                                   #
5
#                                                                            #
6
# Licensed under the Apache License, Version 2.0 (the "License"); you may    #
7
# not use this file except in compliance with the License. You may obtain    #
8
# a copy of the License at                                                   #
9
#                                                                            #
10
# http://www.apache.org/licenses/LICENSE-2.0                                 #
11
#                                                                            #
12
# Unless required by applicable law or agreed to in writing, software        #
13
# distributed under the License is distributed on an "AS IS" BASIS,          #
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   #
15
# See the License for the specific language governing permissions and        #
16
# limitations under the License.                                             #
17
# -------------------------------------------------------------------------- #
18

    
19
from haizea.core.manager import Manager
20
from haizea.common.utils import generate_config_name, unpickle
21
from haizea.core.configfile import HaizeaConfig, HaizeaMultiConfig
22
from haizea.core.accounting import AccountingDataCollection
23
from haizea.common.config import ConfigException
24
from haizea.common.stats import percentile
25
from haizea.cli.optionparser import Option
26
from haizea.cli import Command
27
from mx.DateTime import TimeDelta, Parser
28
import haizea.common.defaults as defaults
29
import sys
30
import os
31
import errno
32
import signal
33
from time import sleep
34

    
35
try:
36
    import xml.etree.ElementTree as ET
37
except ImportError:
38
    # Compatibility with Python <=2.4
39
    import elementtree.ElementTree as ET 
40

    
41

    
42
class haizea(Command):
43
    """
44
    This is the main Haizea command. By default, it will start Haizea as a daemon, which
45
    can receive requests via RPC or interact with other components such as OpenNebula. It can
46
    also start as a foreground process, and write all log messages to the console. All
47
    Haizea options are specified through the configuration file."""
48
    
49
    name = "haizea"
50
    
51
    def __init__(self, argv):
52
        Command.__init__(self, argv)
53
        
54
        self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf",
55
                                         help = """
56
                                         The location of the Haizea configuration file. If not
57
                                         specified, Haizea will first look for it in
58
                                         /etc/haizea/haizea.conf and then in ~/.haizea/haizea.conf.
59
                                         """))
60
        self.optparser.add_option(Option("-f", "--fg", action="store_true",  dest="foreground",
61
                                         help = """
62
                                         Runs Haizea in the foreground.
63
                                         """))
64
        self.optparser.add_option(Option("--stop", action="store_true",  dest="stop",
65
                                         help = """
66
                                         Stops the Haizea daemon.
67
                                         """))
68
                
69
    def run(self):
70
        self.parse_options()
71

    
72
        pidfile = defaults.DAEMON_PIDFILE # TODO: Make configurable
73

    
74
        if self.opt.stop == None:
75
            # Start Haizea
76
             
77
            # Check if a daemon is already running
78
            if os.path.exists(pidfile):
79
                pf  = file(pidfile,'r')
80
                pid = int(pf.read().strip())
81
                pf.close()
82
     
83
                try:
84
                    os.kill(pid, signal.SIG_DFL)
85
                except OSError, (err, msg):
86
                    if err == errno.ESRCH:
87
                        # Pidfile is stale. Remove it.
88
                        os.remove(pidfile)
89
                    else:
90
                        msg = "Unexpected error when checking pid file '%s'.\n%s\n" %(pidfile, msg)
91
                        sys.stderr.write(msg)
92
                        sys.exit(1)
93
                else:
94
                    msg = "Haizea seems to be already running (pid %i)\n" % pid
95
                    sys.stderr.write(msg)
96
                    sys.exit(1)
97
     
98
            try:
99
                configfile=self.opt.conf
100
                if configfile == None:
101
                    # Look for config file in default locations
102
                    for loc in defaults.CONFIG_LOCATIONS:
103
                        if os.path.exists(loc):
104
                            config = HaizeaConfig.from_file(loc)
105
                            break
106
                    else:
107
                        print >> sys.stdout, "No configuration file specified, and none found at default locations."
108
                        print >> sys.stdout, "Make sure a config file exists at:\n  -> %s" % "\n  -> ".join(defaults.CONFIG_LOCATIONS)
109
                        print >> sys.stdout, "Or specify a configuration file with the --conf option."
110
                        exit(1)
111
                else:
112
                    config = HaizeaConfig.from_file(configfile)
113
            except ConfigException, msg:
114
                print >> sys.stderr, "Error in configuration file:"
115
                print >> sys.stderr, msg
116
                exit(1)
117
                
118
            daemon = not self.opt.foreground
119
        
120
            manager = Manager(config, daemon, pidfile)
121
        
122
            manager.start()
123
        elif self.opt.stop: # Stop Haizea
124
            # Based on code in:  http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66012
125
            try:
126
                pf  = file(pidfile,'r')
127
                pid = int(pf.read().strip())
128
                pf.close()
129
            except IOError:
130
                msg = "Could not stop, pid file '%s' missing.\n"
131
                sys.stderr.write(msg % pidfile)
132
                sys.exit(1)
133
            try:
134
                while 1:
135
                    os.kill(pid, signal.SIGTERM)
136
                    sleep(1)
137
            except OSError, err:
138
                err = str(err)
139
                if err.find("No such process") > 0:
140
                    os.remove(pidfile)
141
                else:
142
                    print str(err)
143
                    sys.exit(1)
144

    
145
class haizea_generate_configs(Command):
146
    """
147
    Takes an Haizea multiconfiguration file and generates the individual
148
    configuration files. See the Haizea manual for more details on multiconfiguration
149
    files."""
150
    
151
    name = "haizea-generate-configs"
152

    
153
    def __init__(self, argv):
154
        Command.__init__(self, argv)
155
        
156
        self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", required=True,
157
                                         help = """
158
                                         Multiconfiguration file.
159
                                         """))
160
        self.optparser.add_option(Option("-d", "--dir", action="store", type="string", dest="dir", required=True,
161
                                         help = """
162
                                         Directory where the individual configuration files
163
                                         must be created.
164
                                         """))
165
                
166
    def run(self):    
167
        self.parse_options()
168
        
169
        configfile=self.opt.conf
170
        multiconfig = HaizeaMultiConfig.from_file(configfile)
171
        
172
        etcdir = self.opt.dir
173
        
174
        configs = multiconfig.get_configs()
175
        
176
        etcdir = os.path.abspath(etcdir)    
177
        if not os.path.exists(etcdir):
178
            os.makedirs(etcdir)
179
            
180
        for c in configs:
181
            profile = c.get_attr("profile")
182
            tracefile = c.get("tracefile")
183
            injfile = c.get("injectionfile")
184
            annotationfile = c.get("annotationfile")
185
            configname = generate_config_name(profile, tracefile, annotationfile, injfile)
186
            configfile = etcdir + "/%s.conf" % configname
187
            fc = open(configfile, "w")
188
            c.config.write(fc)
189
            fc.close()
190

    
191
class haizea_generate_scripts(Command):
192
    """
193
    Generates a script, based on a script template, to run all the individual 
194
    configuration files generated by haizea-generate-configs. This command 
195
    requires Mako Templates for Python (http://www.makotemplates.org/)."""
196
    
197
    name = "haizea-generate-scripts"
198

    
199
    def __init__(self, argv):
200
        Command.__init__(self, argv)
201
        
202
        self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", required=True,
203
                                         help = """
204
                                         Multiconfiguration file used in haizea-generate-configs.
205
                                         """))
206
        self.optparser.add_option(Option("-d", "--confdir", action="store", type="string", dest="confdir", required=True,
207
                                         help = """
208
                                         Directory containing the individual configuration files.
209
                                         """))
210
        self.optparser.add_option(Option("-t", "--template", action="store", type="string", dest="template", required=True,
211
                                         help = """
212
                                         Script template (sample templates are included in /usr/share/haizea/etc)
213
                                         """))
214
        self.optparser.add_option(Option("-m", "--only-missing", action="store_true",  dest="onlymissing",
215
                                         help = """
216
                                         If specified, the generated script will only run the configurations
217
                                         that have not already produced a datafile. This is useful when some simulations
218
                                         fail, and you don't want to have to rerun them all.
219
                                         """))
220
                
221
    def run(self):        
222
        self.parse_options()
223
        
224
        configfile=self.opt.conf
225
        multiconfig = HaizeaMultiConfig.from_file(configfile)
226
                
227
        try:
228
            from mako.template import Template
229
        except Exception, e:
230
            print "You need Mako Templates for Python to run this command."
231
            print "You can download them at http://www.makotemplates.org/"
232
            exit(1)
233
    
234
        configs = multiconfig.get_configs()
235
        
236
        etcdir = os.path.abspath(self.opt.confdir)    
237
        if not os.path.exists(etcdir):
238
            os.makedirs(etcdir)
239
            
240
        templatedata = []    
241
        for c in configs:
242
            profile = c.get_attr("profile")
243
            tracefile = c.get("tracefile")
244
            injfile = c.get("injectionfile")
245
            datafile = c.get("datafile")
246
            annotationfile = c.get("annotationfile")            
247
            configname = generate_config_name(profile, tracefile, annotationfile, injfile)
248
            if not self.opt.onlymissing or not os.path.exists(datafile):
249
                configfile = etcdir + "/%s.conf" % configname
250
                templatedata.append((configname, configfile))
251
    
252
        template = Template(filename=self.opt.template)
253
        print template.render(configs=templatedata, etcdir=etcdir)
254

    
255

    
256
class haizea_convert_data(Command):
257
    """
258
    Converts Haizea datafiles into another (easier to process) format.
259
    """
260
    
261
    name = "haizea-convert-data"
262

    
263
    def __init__(self, argv):
264
        Command.__init__(self, argv)
265
        
266
        self.optparser.add_option(Option("-t", "--type", action="store",  dest="type",
267
                                         choices = ["per-run", "per-lease", "counter"],
268
                                         help = """
269
                                         Type of data to produce.
270
                                         """))
271
        self.optparser.add_option(Option("-c", "--counter", action="store",  dest="counter",
272
                                         help = """
273
                                         Counter to print out when using '--type counter'.
274
                                         """))
275
        self.optparser.add_option(Option("-f", "--format", action="store", type="string", dest="format",
276
                                         help = """
277
                                         Output format. Currently supported: csv
278
                                         """))
279
        self.optparser.add_option(Option("-l", "--list-counters", action="store_true",  dest="list_counters",
280
                                         help = """
281
                                         If specified, the command will just print out the names of counters
282
                                         stored in the data file and then exit, regardless of other parameters.
283
                                         """))
284
                
285
    def run(self):            
286
        self.parse_options()
287

    
288
        datafiles=self.args[1:]
289
        if len(datafiles) == 0:
290
            print "Please specify at least one datafile to convert"
291
            exit(1)
292
        
293
        datafile1 = unpickle(datafiles[0])
294
        
295
        counter_names = datafile1.counters.keys()
296
        attr_names = datafile1.attrs.keys()
297
        lease_stats_names = datafile1.lease_stats_names
298
        stats_names = datafile1.stats_names
299

    
300
        if self.opt.list_counters:
301
            for counter in counter_names:
302
                print counter
303
            exit(0)
304
        
305
        if self.opt.type == "per-run":
306
            header_fields = attr_names + stats_names
307
        elif self.opt.type == "per-lease":
308
            header_fields = attr_names + ["lease_id"] + lease_stats_names
309
        elif self.opt.type == "counter":
310
            counter = self.opt.counter
311
            if not datafile1.counters.has_key(counter):
312
                print "The specified datafile does not have a counter called '%s'" % counter
313
                exit(1)
314
            header_fields = attr_names + ["time", "value"]
315
            if datafile1.counter_avg_type[counter] != AccountingDataCollection.AVERAGE_NONE:
316
                header_fields.append("average")                
317

    
318
        header = ",".join(header_fields)
319
            
320
        print header
321
        
322
        for datafile in datafiles:
323
            data = unpickle(datafile)
324
        
325
            attrs = [data.attrs[attr_name] for attr_name in attr_names]
326
                        
327
            if self.opt.type == "per-run":
328
                fields = attrs + [`data.stats[stats_name]` for stats_name in stats_names]
329
                print ",".join(fields)
330
            elif self.opt.type == "per-lease":
331
                leases = data.lease_stats
332
                for lease_id, lease_stat in leases.items():
333
                    fields = attrs + [`lease_id`] + [`lease_stat.get(lease_stat_name,"")` for lease_stat_name in lease_stats_names]
334
                    print ",".join(fields)
335
            elif self.opt.type == "counter":
336
                for (time, lease_id, value, avg) in data.counters[counter]:
337
                    fields = attrs + [`time`, `value`]
338
                    if data.counter_avg_type[counter] != AccountingDataCollection.AVERAGE_NONE:
339
                        fields.append(`avg`)
340
                    print ",".join(fields)
341
                    
342

    
343

    
344
class haizea_lwf2xml(Command):
345
    """
346
    Converts old Haizea LWF file into new XML-based LWF format
347
    """
348
    
349
    name = "haizea-lwf2xml"
350

    
351
    def __init__(self, argv):
352
        Command.__init__(self, argv)
353
        
354
        self.optparser.add_option(Option("-i", "--in", action="store",  type="string", dest="inf",
355
                                         help = """
356
                                         Input file
357
                                         """))
358
        self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf",
359
                                         help = """
360
                                         Output file
361
                                         """))
362
                
363
    def run(self):            
364
        self.parse_options()
365

    
366
        infile = self.opt.inf
367
        outfile = self.opt.outf
368
        
369
        root = ET.Element("lease-workload")
370
        root.set("name", infile)
371
        description = ET.SubElement(root, "description")
372
        time = TimeDelta(seconds=0)
373
        lease_id = 1
374
        requests = ET.SubElement(root, "lease-requests")
375
        
376
        
377
        infile = open(infile, "r")
378
        for line in infile:
379
            if line[0]!='#' and len(line.strip()) != 0:
380
                fields = line.split()
381
                submit_time = int(fields[0])
382
                start_time = int(fields[1])
383
                duration = int(fields[2])
384
                real_duration = int(fields[3])
385
                num_nodes = int(fields[4])
386
                cpu = int(fields[5])
387
                mem = int(fields[6])
388
                disk = int(fields[7])
389
                vm_image = fields[8]
390
                vm_imagesize = int(fields[9])
391
                
392
                
393
        
394
                lease_request = ET.SubElement(requests, "lease-request")
395
                lease_request.set("arrival", str(TimeDelta(seconds=submit_time)))
396
                if real_duration != duration:
397
                    realduration = ET.SubElement(lease_request, "realduration")
398
                    realduration.set("time", str(TimeDelta(seconds=real_duration)))
399
                
400
                lease = ET.SubElement(lease_request, "lease")
401
                lease.set("id", `lease_id`)
402

    
403
                
404
                nodes = ET.SubElement(lease, "nodes")
405
                node_set = ET.SubElement(nodes, "node-set")
406
                node_set.set("numnodes", `num_nodes`)
407
                res = ET.SubElement(node_set, "res")
408
                res.set("type", "CPU")
409
                if cpu == 1:
410
                    res.set("amount", "100")
411
                else:
412
                    pass
413
                res = ET.SubElement(node_set, "res")
414
                res.set("type", "Memory")
415
                res.set("amount", `mem`)
416
                
417
                start = ET.SubElement(lease, "start")
418
                if start_time == -1:
419
                    lease.set("preemptible", "true")
420
                else:
421
                    lease.set("preemptible", "false")
422
                    exact = ET.SubElement(start, "exact")
423
                    exact.set("time", str(TimeDelta(seconds=start_time)))
424

    
425
                duration_elem = ET.SubElement(lease, "duration")
426
                duration_elem.set("time", str(TimeDelta(seconds=duration)))
427

    
428
                software = ET.SubElement(lease, "software")
429
                diskimage = ET.SubElement(software, "disk-image")
430
                diskimage.set("id", vm_image)
431
                diskimage.set("size", `vm_imagesize`)
432
                
433
                    
434
                lease_id += 1
435
        tree = ET.ElementTree(root)
436
        print ET.tostring(root)
437

    
438

    
439
        
440
class haizea_swf2lwf(Command):
441
    """
442
    Converts Standard Workload Format (SWF, used in the Parallel Workloads Archive at
443
    http://www.cs.huji.ac.il/labs/parallel/workload/) to Lease Workload Format
444
    """
445
    
446
    name = "haizea-swf2lwf"
447

    
448
    def __init__(self, argv):
449
        Command.__init__(self, argv)
450
        
451
        self.optparser.add_option(Option("-i", "--in", action="store",  type="string", dest="inf", required=True,
452
                                         help = """
453
                                         Input file
454
                                         """))
455
        self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf", required=True,
456
                                         help = """
457
                                         Output file
458
                                         """))
459
        self.optparser.add_option(Option("-p", "--preemptible", action="store", type="string", dest="preemptible", required=True,
460
                                         help = """
461
                                         Should the leases be preemptable or not?
462
                                         """))
463
        self.optparser.add_option(Option("-f", "--from", action="store", type="string", dest="from_time", default="00:00:00:00",
464
                                         help = """
465
                                         This parameter, together with the --amount parameter, allows converting just
466
                                         an interval of the SWF file.
467
                                         
468
                                         This parameter must be a timestamp of the format DD:HH:MM:SS. Only jobs sumitted
469
                                         at or after that time will be converted.
470
                                         
471
                                         Default is 00:00:00:00
472
                                         """))        
473
        self.optparser.add_option(Option("-l", "--interval-length", action="store", type="string", dest="interval_length",
474
                                         help = """
475
                                         Length of the interval in format DD:HH:MM:SS. Default is to convert jobs until the
476
                                         end of the SWF file.
477
                                         """))        
478
        self.optparser.add_option(Option("-q", "--queues", action="store", type="string", dest="queues",
479
                                         help = """
480
                                         Only convert jobs from the specified queues
481
                                         """))
482
        self.optparser.add_option(Option("-m", "--memory", action="store", type="string", dest="mem",
483
                                         help = """
484
                                         Memory requested by jobs.
485
                                         """))
486
        self.optparser.add_option(Option("-s", "--scale", action="store", type="string", dest="scale",
487
                                         help = """
488
                                         Scale number of processors by 1/SCALE.
489
                                         """))        
490
                
491
    def run(self):            
492
        self.parse_options()
493

    
494
        infile = self.opt.inf
495
        outfile = self.opt.outf
496
        
497
        from_time = Parser.DateTimeDeltaFromString(self.opt.from_time)
498
        if self.opt.interval_length == None:
499
            to_time = None
500
        else:
501
            to_time = from_time + Parser.DateTimeDeltaFromString(self.opt.interval_length)
502

    
503
        root = ET.Element("lease-workload")
504
        root.set("name", infile)
505
        description = ET.SubElement(root, "description")
506
        description.text = "Created with haizea-swf2lwf %s" % " ".join(self.argv[1:])
507
        time = TimeDelta(seconds=0)
508
        requests = ET.SubElement(root, "lease-requests")
509
        
510
        slowdowns = []
511
        users = set()
512
        
513
        infile = open(infile, "r")
514
        for line in infile:
515
            if line[0]!=';' and len(line.strip()) != 0:
516
                fields = line.split()
517
                
518
                # Unpack the job's attributes. The description of each field is
519
                # taken from the SWF documentation at
520
                # http://www.cs.huji.ac.il/labs/parallel/workload/swf.html
521
                
522
                # Job Number -- a counter field, starting from 1. 
523
                job_number = int(fields[0])
524
                
525
                # Submit Time -- in seconds. The earliest time the log refers to is zero, 
526
                # and is the submittal time the of the first job. The lines in the log are 
527
                # sorted by ascending submittal times. It makes sense for jobs to also be 
528
                # numbered in this order.
529
                submit_time = int(fields[1])
530

    
531
                # Wait Time -- in seconds. The difference between the job's submit time 
532
                # and the time at which it actually began to run. Naturally, this is only 
533
                # relevant to real logs, not to models.
534
                wait_time = int(fields[2])
535

    
536
                # Run Time -- in seconds. The wall clock time the job was running (end 
537
                # time minus start time).
538
                # We decided to use ``wait time'' and ``run time'' instead of the equivalent 
539
                # ``start time'' and ``end time'' because they are directly attributable to 
540
                # the scheduler and application, and are more suitable for models where only 
541
                # the run time is relevant.
542
                # Note that when values are rounded to an integral number of seconds (as 
543
                # often happens in logs) a run time of 0 is possible and means the job ran 
544
                # for less than 0.5 seconds. On the other hand it is permissable to use 
545
                # floating point values for time fields.
546
                run_time = int(fields[3])
547
                
548
                # Number of Allocated Processors -- an integer. In most cases this is also 
549
                # the number of processors the job uses; if the job does not use all of them, 
550
                # we typically don't know about it.
551
                num_processors_allocated = int(fields[4])
552
                
553
                # Average CPU Time Used -- both user and system, in seconds. This is the 
554
                # average over all processors of the CPU time used, and may therefore be 
555
                # smaller than the wall clock runtime. If a log contains the total CPU time 
556
                # used by all the processors, it is divided by the number of allocated 
557
                # processors to derive the average.
558
                avg_cpu_time = float(fields[5])
559
                
560
                # Used Memory -- in kilobytes. This is again the average per processor.
561
                used_memory = int(fields[6])
562
                
563
                # Requested Number of Processors.
564
                num_processors_requested = int(fields[7])
565
                
566
                # Requested Time. This can be either runtime (measured in wallclock seconds), 
567
                # or average CPU time per processor (also in seconds) -- the exact meaning 
568
                # is determined by a header comment. In many logs this field is used for 
569
                # the user runtime estimate (or upper bound) used in backfilling. If a log 
570
                # contains a request for total CPU time, it is divided by the number of 
571
                # requested processors.
572
                time_requested = int(fields[8])
573
                
574
                # Requested Memory (again kilobytes per processor).
575
                mem_requested = int(fields[9])
576
                
577
                # Status 1 if the job was completed, 0 if it failed, and 5 if cancelled. 
578
                # If information about chekcpointing or swapping is included, other values 
579
                # are also possible. See usage note below. This field is meaningless for 
580
                # models, so would be -1.
581
                status = int(fields[10])
582
                
583
                # User ID -- a natural number, between one and the number of different users.
584
                user_id = int(fields[11])
585
                
586
                # Group ID -- a natural number, between one and the number of different groups. 
587
                # Some systems control resource usage by groups rather than by individual users.
588
                group_id = int(fields[12])
589
                
590
                # Executable (Application) Number -- a natural number, between one and the number 
591
                # of different applications appearing in the workload. in some logs, this might 
592
                # represent a script file used to run jobs rather than the executable directly; 
593
                # this should be noted in a header comment.
594
                exec_number = int(fields[13])
595
                
596
                # Queue Number -- a natural number, between one and the number of different 
597
                # queues in the system. The nature of the system's queues should be explained 
598
                # in a header comment. This field is where batch and interactive jobs should 
599
                # be differentiated: we suggest the convention of denoting interactive jobs by 0.
600
                queue = int(fields[14])
601
                
602
                # Partition Number -- a natural number, between one and the number of different 
603
                # partitions in the systems. The nature of the system's partitions should be 
604
                # explained in a header comment. For example, it is possible to use partition 
605
                # numbers to identify which machine in a cluster was used.
606
                partition = int(fields[15])
607
                
608
                # Preceding Job Number -- this is the number of a previous job in the workload, 
609
                # such that the current job can only start after the termination of this preceding 
610
                # job. Together with the next field, this allows the workload to include feedback 
611
                # as described below.
612
                prec_job = int(fields[16])
613

    
614
                # Think Time from Preceding Job -- this is the number of seconds that should elapse 
615
                # between the termination of the preceding job and the submittal of this one. 
616
                prec_job_thinktime = int(fields[17])
617

    
618
                                
619
                # Check if we have to skip this job
620
                
621
                submit_time = TimeDelta(seconds=submit_time)
622
                
623
                if submit_time < from_time:
624
                    continue
625
                
626
                if to_time != None and submit_time > to_time:
627
                    break
628
                
629
                if run_time < 0 and status==5:
630
                    # This is a job that got cancelled while waiting in the queue
631
                    continue
632
                
633
                if self.opt.queues != None:
634
                    queues = [int(q) for q in self.opt.queues.split(",")]
635
                    if queue not in queues:
636
                        # Job was submitted to a queue we're filtering out
637
                        continue              
638
        
639
                if self.opt.scale != None:
640
                    num_processors_requested = int(num_processors_requested/int(self.opt.scale))
641
                    
642
                # Make submission time relative to starting time of trace
643
                submit_time = submit_time - from_time
644
        
645
                lease_request = ET.SubElement(requests, "lease-request")
646
                lease_request.set("arrival", str(submit_time))
647

    
648
                if run_time == 0:
649
                    # As specified in the SWF documentation, a runtime of 0 means
650
                    # the job ran for less than a second, so we round up to 1.
651
                    run_time = 1 
652
                realduration = ET.SubElement(lease_request, "realduration")
653
                realduration.set("time", str(TimeDelta(seconds=run_time)))
654
                
655
                lease = ET.SubElement(lease_request, "lease")
656
                lease.set("id", `job_number`)
657

    
658
                
659
                nodes = ET.SubElement(lease, "nodes")
660
                node_set = ET.SubElement(nodes, "node-set")
661
                node_set.set("numnodes", `num_processors_requested`)
662
                res = ET.SubElement(node_set, "res")
663
                res.set("type", "CPU")
664
                res.set("amount", "100")
665

    
666
                res = ET.SubElement(node_set, "res")
667
                res.set("type", "Memory")
668
                if self.opt.mem != None:
669
                    res.set("amount", self.opt.mem)
670
                elif mem_requested != -1:
671
                    res.set("amount", `mem_requested / 1024`)
672
                else:
673
                    print "Cannot convert this file. Job #%i does not specify requested memory, and --memory parameter not specified" % job_number
674
                    exit(-1)
675
                    
676
                if run_time < 10:
677
                    run_time2 = 10
678
                else:
679
                    run_time2 = run_time
680
                slowdown = wait_time + run_time2 / float(run_time2)
681
                slowdowns.append(slowdown)
682
                
683
                if not user_id in users:
684
                    users.add(user_id)
685
                
686
                start = ET.SubElement(lease, "start")
687
                #lease.set("preemptible", self.opt.preemptible)
688
                lease.set("user", `user_id`)
689

    
690
                duration_elem = ET.SubElement(lease, "duration")
691
                duration_elem.set("time", str(TimeDelta(seconds=time_requested)))
692

    
693
                # No software environment specified. The annotator would have to be used to
694
                # add one (or an image file when running a simulation).
695
                software = ET.SubElement(lease, "software")
696
                diskimage = ET.SubElement(software, "none")
697
                
698
                # Add unused SWF attributes to the extra section, for future reference.
699
                extra = ET.SubElement(lease, "extra")
700
                attr = ET.SubElement(extra, "attr")
701
                attr.set("name", "SWF_waittime")
702
                attr.set("value", `wait_time`)
703
                attr = ET.SubElement(extra, "attr")
704
                attr.set("name", "SWF_avgcputime")
705
                attr.set("value", `avg_cpu_time`)
706
                attr = ET.SubElement(extra, "attr")
707
                attr.set("name", "SWF_queue")
708
                attr.set("value", `queue`)
709
                attr = ET.SubElement(extra, "attr")
710
                attr.set("name", "SWF_group")
711
                attr.set("value", `group_id`)
712
                attr = ET.SubElement(extra, "attr")
713
                attr.set("name", "SWF_execnumber")
714
                attr.set("value", `exec_number`)
715
                    
716
        tree = ET.ElementTree(root)
717
        
718
        outfile = open(outfile, "w")
719
        tree.write(outfile)
720
        
721
        infile.close()
722
        outfile.close()
723
        
724
        slowdowns.sort()
725

    
726
        print "SLOWDOWNS"
727
        print "---------"
728
        print "min: %.2f" % slowdowns[0]
729
        print "10p: %.2f" % percentile(slowdowns, 0.1)
730
        print "25p: %.2f" % percentile(slowdowns, 0.25)
731
        print "med: %.2f" % percentile(slowdowns, 0.5)
732
        print "75p: %.2f" % percentile(slowdowns, 0.75)
733
        print "90p: %.2f" % percentile(slowdowns, 0.9)
734
        print "max: %.2f" % slowdowns[-1]
735
        print 
736
        print "USERS"
737
        print "-----"
738
        print "Number of users: %i" % len(users)
739