Project

General

Profile

root / branches / 1.1 / src / haizea / cli / commands.py @ 798

1
# -------------------------------------------------------------------------- #
2
# Copyright 2006-2009, University of Chicago                                 #
3
# Copyright 2008-2009, Distributed Systems Architecture Group, Universidad   #
4
# Complutense de Madrid (dsa-research.org)                                   #
5
#                                                                            #
6
# Licensed under the Apache License, Version 2.0 (the "License"); you may    #
7
# not use this file except in compliance with the License. You may obtain    #
8
# a copy of the License at                                                   #
9
#                                                                            #
10
# http://www.apache.org/licenses/LICENSE-2.0                                 #
11
#                                                                            #
12
# Unless required by applicable law or agreed to in writing, software        #
13
# distributed under the License is distributed on an "AS IS" BASIS,          #
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   #
15
# See the License for the specific language governing permissions and        #
16
# limitations under the License.                                             #
17
# -------------------------------------------------------------------------- #
18

    
19
from haizea.core.manager import Manager
20
from haizea.common.utils import generate_config_name, unpickle
21
from haizea.core.configfile import HaizeaConfig, HaizeaMultiConfig
22
from haizea.core.accounting import AccountingDataCollection
23
from haizea.common.config import ConfigException
24
from haizea.common.stats import percentile
25
from haizea.cli.optionparser import Option
26
from haizea.cli import Command
27
from haizea.lwf.generators import LWFGenerator, LWFAnnotationGenerator
28
from haizea.lwf.analysis import LWFAnalyser
29
from mx.DateTime import TimeDelta, Parser
30
import haizea.common.defaults as defaults
31
import sys
32
import os
33
import errno
34
import signal
35
from time import sleep
36

    
37
try:
38
    import xml.etree.ElementTree as ET
39
except ImportError:
40
    # Compatibility with Python <=2.4
41
    import elementtree.ElementTree as ET 
42

    
43

    
44
class haizea(Command):
45
    """
46
    This is the main Haizea command. By default, it will start Haizea as a daemon, which
47
    can receive requests via RPC or interact with other components such as OpenNebula. It can
48
    also start as a foreground process, and write all log messages to the console. All
49
    Haizea options are specified through the configuration file."""
50
    
51
    name = "haizea"
52
    
53
    def __init__(self, argv):
54
        Command.__init__(self, argv)
55
        
56
        self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf",
57
                                         help = """
58
                                         The location of the Haizea configuration file. If not
59
                                         specified, Haizea will first look for it in
60
                                         /etc/haizea/haizea.conf and then in ~/.haizea/haizea.conf.
61
                                         """))
62
        self.optparser.add_option(Option("-f", "--fg", action="store_true",  dest="foreground",
63
                                         help = """
64
                                         Runs Haizea in the foreground.
65
                                         """))
66
        self.optparser.add_option(Option("--stop", action="store_true",  dest="stop",
67
                                         help = """
68
                                         Stops the Haizea daemon.
69
                                         """))
70
                
71
    def run(self):
72
        self.parse_options()
73

    
74
        pidfile = defaults.DAEMON_PIDFILE # TODO: Make configurable
75

    
76
        if self.opt.stop == None:
77
            # Start Haizea
78
             
79
            # Check if a daemon is already running
80
            if os.path.exists(pidfile):
81
                pf  = file(pidfile,'r')
82
                pid = int(pf.read().strip())
83
                pf.close()
84
     
85
                try:
86
                    os.kill(pid, signal.SIG_DFL)
87
                except OSError, (err, msg):
88
                    if err == errno.ESRCH:
89
                        # Pidfile is stale. Remove it.
90
                        os.remove(pidfile)
91
                    else:
92
                        msg = "Unexpected error when checking pid file '%s'.\n%s\n" %(pidfile, msg)
93
                        sys.stderr.write(msg)
94
                        sys.exit(1)
95
                else:
96
                    msg = "Haizea seems to be already running (pid %i)\n" % pid
97
                    sys.stderr.write(msg)
98
                    sys.exit(1)
99
     
100
            try:
101
                configfile=self.opt.conf
102
                if configfile == None:
103
                    # Look for config file in default locations
104
                    for loc in defaults.CONFIG_LOCATIONS:
105
                        if os.path.exists(loc):
106
                            config = HaizeaConfig.from_file(loc)
107
                            break
108
                    else:
109
                        print >> sys.stdout, "No configuration file specified, and none found at default locations."
110
                        print >> sys.stdout, "Make sure a config file exists at:\n  -> %s" % "\n  -> ".join(defaults.CONFIG_LOCATIONS)
111
                        print >> sys.stdout, "Or specify a configuration file with the --conf option."
112
                        exit(1)
113
                else:
114
                    config = HaizeaConfig.from_file(configfile)
115
            except ConfigException, msg:
116
                print >> sys.stderr, "Error in configuration file:"
117
                print >> sys.stderr, msg
118
                exit(1)
119
                
120
            daemon = not self.opt.foreground
121
        
122
            manager = Manager(config, daemon, pidfile)
123
        
124
            manager.start()
125
        elif self.opt.stop: # Stop Haizea
126
            # Based on code in:  http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66012
127
            try:
128
                pf  = file(pidfile,'r')
129
                pid = int(pf.read().strip())
130
                pf.close()
131
            except IOError:
132
                msg = "Could not stop, pid file '%s' missing.\n"
133
                sys.stderr.write(msg % pidfile)
134
                sys.exit(1)
135
            try:
136
                while 1:
137
                    os.kill(pid, signal.SIGTERM)
138
                    sleep(1)
139
            except OSError, err:
140
                err = str(err)
141
                if err.find("No such process") > 0:
142
                    os.remove(pidfile)
143
                else:
144
                    print str(err)
145
                    sys.exit(1)
146

    
147
class haizea_generate_configs(Command):
148
    """
149
    Takes an Haizea multiconfiguration file and generates the individual
150
    configuration files. See the Haizea manual for more details on multiconfiguration
151
    files."""
152
    
153
    name = "haizea-generate-configs"
154

    
155
    def __init__(self, argv):
156
        Command.__init__(self, argv)
157
        
158
        self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", required=True,
159
                                         help = """
160
                                         Multiconfiguration file.
161
                                         """))
162
        self.optparser.add_option(Option("-d", "--dir", action="store", type="string", dest="dir", required=True,
163
                                         help = """
164
                                         Directory where the individual configuration files
165
                                         must be created.
166
                                         """))
167
                
168
    def run(self):    
169
        self.parse_options()
170
        
171
        configfile=self.opt.conf
172
        multiconfig = HaizeaMultiConfig.from_file(configfile)
173
        
174
        etcdir = self.opt.dir
175
        
176
        configs = multiconfig.get_configs()
177
        
178
        etcdir = os.path.abspath(etcdir)    
179
        if not os.path.exists(etcdir):
180
            os.makedirs(etcdir)
181
            
182
        for c in configs:
183
            profile = c.get_attr("profile")
184
            tracefile = c.get("tracefile")
185
            injfile = c.get("injectionfile")
186
            annotationfile = c.get("annotationfile")
187
            configname = generate_config_name(profile, tracefile, annotationfile, injfile)
188
            configfile = etcdir + "/%s.conf" % configname
189
            fc = open(configfile, "w")
190
            c.config.write(fc)
191
            fc.close()
192

    
193
class haizea_generate_scripts(Command):
194
    """
195
    Generates a script, based on a script template, to run all the individual 
196
    configuration files generated by haizea-generate-configs. This command 
197
    requires Mako Templates for Python (http://www.makotemplates.org/)."""
198
    
199
    name = "haizea-generate-scripts"
200

    
201
    def __init__(self, argv):
202
        Command.__init__(self, argv)
203
        
204
        self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf", required=True,
205
                                         help = """
206
                                         Multiconfiguration file used in haizea-generate-configs.
207
                                         """))
208
        self.optparser.add_option(Option("-d", "--confdir", action="store", type="string", dest="confdir", required=True,
209
                                         help = """
210
                                         Directory containing the individual configuration files.
211
                                         """))
212
        self.optparser.add_option(Option("-t", "--template", action="store", type="string", dest="template", required=True,
213
                                         help = """
214
                                         Script template (sample templates are included in /usr/share/haizea/etc)
215
                                         """))
216
        self.optparser.add_option(Option("-m", "--only-missing", action="store_true",  dest="onlymissing",
217
                                         help = """
218
                                         If specified, the generated script will only run the configurations
219
                                         that have not already produced a datafile. This is useful when some simulations
220
                                         fail, and you don't want to have to rerun them all.
221
                                         """))
222

    
223
        self.optparser.add_option(Option("-x", "--exclude", action="store", type="string", dest="exclude",
224
                                         help = """
225
                                         ...
226
                                         """))
227

    
228
                
229
    def run(self):        
230
        self.parse_options()
231
        
232
        configfile=self.opt.conf
233
        multiconfig = HaizeaMultiConfig.from_file(configfile)
234
                
235
        try:
236
            from mako.template import Template
237
        except Exception, e:
238
            print "You need Mako Templates for Python to run this command."
239
            print "You can download them at http://www.makotemplates.org/"
240
            exit(1)
241
    
242
        configs = multiconfig.get_configs()
243
        
244
        etcdir = os.path.abspath(self.opt.confdir)    
245
        if not os.path.exists(etcdir):
246
            os.makedirs(etcdir)
247

    
248
        if self.opt.exclude != None:
249
            exclude = self.opt.exclude.split()
250
        else:
251
            exclude = []
252
            
253
        templatedata = []    
254
        for c in configs:
255
            profile = c.get_attr("profile")
256
            tracefile = c.get("tracefile")
257
            injfile = c.get("injectionfile")
258
            datafile = c.get("datafile")
259
            annotationfile = c.get("annotationfile")            
260
            configname = generate_config_name(profile, tracefile, annotationfile, injfile)
261
            configfile = etcdir + "/%s.conf" % configname
262
            if (not self.opt.onlymissing or not os.path.exists(datafile)) and not configfile in exclude:
263
                templatedata.append((configname, configfile))
264
    
265
        template = Template(filename=self.opt.template)
266
        print template.render(configs=templatedata, etcdir=etcdir)
267

    
268

    
269
class haizea_convert_data(Command):
270
    """
271
    Converts Haizea datafiles into another (easier to process) format.
272
    """
273
    
274
    name = "haizea-convert-data"
275

    
276
    def __init__(self, argv):
277
        Command.__init__(self, argv)
278
        
279
        self.optparser.add_option(Option("-t", "--type", action="store",  dest="type",
280
                                         choices = ["per-run", "per-lease", "counter"],
281
                                         help = """
282
                                         Type of data to produce.
283
                                         """))
284
        self.optparser.add_option(Option("-c", "--counter", action="store",  dest="counter",
285
                                         help = """
286
                                         Counter to print out when using '--type counter'.
287
                                         """))
288
        self.optparser.add_option(Option("-f", "--format", action="store", type="string", dest="format",
289
                                         help = """
290
                                         Output format. Currently supported: csv
291
                                         """))
292
        self.optparser.add_option(Option("-l", "--list-counters", action="store_true",  dest="list_counters",
293
                                         help = """
294
                                         If specified, the command will just print out the names of counters
295
                                         stored in the data file and then exit, regardless of other parameters.
296
                                         """))
297
                
298
    def run(self):            
299
        self.parse_options()
300

    
301
        datafiles=self.args[1:]
302
        if len(datafiles) == 0:
303
            print "Please specify at least one datafile to convert"
304
            exit(1)
305
        
306
        datafile1 = unpickle(datafiles[0])
307
        
308
        counter_names = datafile1.counters.keys()
309
        attr_names = datafile1.attrs.keys()
310
        lease_stats_names = datafile1.lease_stats_names
311
        stats_names = datafile1.stats_names
312

    
313
        if self.opt.list_counters:
314
            for counter in counter_names:
315
                print counter
316
            exit(0)
317
        
318
        if self.opt.type == "per-run":
319
            header_fields = attr_names + stats_names
320
        elif self.opt.type == "per-lease":
321
            header_fields = attr_names + ["lease_id"] + lease_stats_names
322
        elif self.opt.type == "counter":
323
            counter = self.opt.counter
324
            if not datafile1.counters.has_key(counter):
325
                print "The specified datafile does not have a counter called '%s'" % counter
326
                exit(1)
327
            header_fields = attr_names + ["time", "value"]
328
            if datafile1.counter_avg_type[counter] != AccountingDataCollection.AVERAGE_NONE:
329
                header_fields.append("average")                
330

    
331
        header = ",".join(header_fields)
332
            
333
        print header
334
        
335
        for datafile in datafiles:
336
            try:
337
                data = unpickle(datafile)
338
            
339
                attrs = [data.attrs[attr_name] for attr_name in attr_names]
340
                            
341
                if self.opt.type == "per-run":
342
                    fields = attrs + [str(data.stats[stats_name]) for stats_name in stats_names]
343
                    print ",".join(fields)
344
                elif self.opt.type == "per-lease":
345
                    leases = data.lease_stats
346
                    for lease_id, lease_stat in leases.items():
347
                        fields = attrs + [`lease_id`] + [str(lease_stat.get(lease_stat_name,"")) for lease_stat_name in lease_stats_names]
348
                        print ",".join(fields)
349
                elif self.opt.type == "counter":
350
                    for (time, lease_id, value, avg) in data.counters[counter]:
351
                        fields = attrs + [`time`, `value`]
352
                        if data.counter_avg_type[counter] != AccountingDataCollection.AVERAGE_NONE:
353
                            fields.append(`avg`)
354
                        print ",".join(fields)
355
            except:
356
                print >> sys.stderr, "Error reading file %s" % (datafile)
357

    
358
class haizea_lwf_generate(Command):
359
    
360
    name = "haizea-lwf-generate"
361
    
362
    def __init__(self, argv):
363
        Command.__init__(self, argv)
364
        self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf", required=True,
365
                                         help = """
366
                                         LWF file
367
                                         """))
368
        self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf",
369
                                         help = """
370
                                         ...
371
                                         """))      
372
        
373
    def run(self):
374
        self.parse_options()      
375
        
376
        outfile = self.opt.outf
377
        conffile = self.opt.conf    
378

    
379
        generator = LWFGenerator(outfile, conffile)
380
        
381
        generator.generate()
382

    
383
class haizea_lwf_stats(Command):
384
    
385
    name = "haizea-lwf-stats"
386
    
387
    def __init__(self, argv):
388
        Command.__init__(self, argv)
389
        self.optparser.add_option(Option("-i", "--in", action="store",  type="string", dest="inf", required=True,
390
                                         help = """
391
                                         Input file
392
                                         """))
393
        self.optparser.add_option(Option("-l", "--utilization-length", action="store", type="string", dest="utilization_length",
394
                                         help = """
395
                                         Length of the utilization interval in format DD:HH:MM:SS. Default is until
396
                                         the time the last lease is requested.
397
                                         """))          
398
        
399
    def run(self):
400
        self.parse_options()      
401
        
402
        infile = self.opt.inf
403
        utilization_length = self.opt.utilization_length
404
        if utilization_length != None:
405
            utilization_length = Parser.DateTimeDeltaFromString(utilization_length)
406

    
407
        analyser = LWFAnalyser(infile, utilization_length)
408
        
409
        analyser.analyse()
410

    
411
class haizea_lwf_annotate(Command):
412
    
413
    name = "haizea-lwf-annotate"
414
    
415
    def __init__(self, argv):
416
        Command.__init__(self, argv)
417
        self.optparser.add_option(Option("-i", "--in", action="store",  type="string", dest="inf", required=True,
418
                                         help = """
419
                                         LWF file
420
                                         """))
421
        self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf", required=True,
422
                                         help = """
423
                                         Annotation file
424
                                         """))
425
        self.optparser.add_option(Option("-c", "--conf", action="store", type="string", dest="conf",
426
                                         help = """
427
                                         ...
428
                                         """))
429
        
430
    def run(self):
431
        self.parse_options()      
432
        
433
        infile = self.opt.inf
434
        outfile = self.opt.outf
435
        conffile = self.opt.conf    
436
        
437
        generator = LWFAnnotationGenerator(infile, outfile, conffile)
438
        
439
        generator.generate()
440
        
441

    
442
# TODO: Has to be moved to the haizea.lwf package, or removed altogether,
443
# given that this was a one-time thing to convert the old ad hoc LWF files
444
class haizea_lwf2xml(Command):
445
    """
446
    Converts old Haizea LWF file into new XML-based LWF format
447
    """
448
    
449
    name = "haizea-lwf2xml"
450

    
451
    def __init__(self, argv):
452
        Command.__init__(self, argv)
453
        
454
        self.optparser.add_option(Option("-i", "--in", action="store",  type="string", dest="inf",
455
                                         help = """
456
                                         Input file
457
                                         """))
458
        self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf",
459
                                         help = """
460
                                         Output file
461
                                         """))
462
                
463
    def run(self):            
464
        self.parse_options()
465

    
466
        infile = self.opt.inf
467
        outfile = self.opt.outf
468
        
469
        root = ET.Element("lease-workload")
470
        root.set("name", infile)
471
        description = ET.SubElement(root, "description")
472
        time = TimeDelta(seconds=0)
473
        lease_id = 1
474
        requests = ET.SubElement(root, "lease-requests")
475
        
476
        
477
        infile = open(infile, "r")
478
        for line in infile:
479
            if line[0]!='#' and len(line.strip()) != 0:
480
                fields = line.split()
481
                submit_time = int(fields[0])
482
                start_time = int(fields[1])
483
                duration = int(fields[2])
484
                real_duration = int(fields[3])
485
                num_nodes = int(fields[4])
486
                cpu = int(fields[5])
487
                mem = int(fields[6])
488
                disk = int(fields[7])
489
                vm_image = fields[8]
490
                vm_imagesize = int(fields[9])
491
                
492
                
493
        
494
                lease_request = ET.SubElement(requests, "lease-request")
495
                lease_request.set("arrival", str(TimeDelta(seconds=submit_time)))
496
                if real_duration != duration:
497
                    realduration = ET.SubElement(lease_request, "realduration")
498
                    realduration.set("time", str(TimeDelta(seconds=real_duration)))
499
                
500
                lease = ET.SubElement(lease_request, "lease")
501
                lease.set("id", `lease_id`)
502

    
503
                
504
                nodes = ET.SubElement(lease, "nodes")
505
                node_set = ET.SubElement(nodes, "node-set")
506
                node_set.set("numnodes", `num_nodes`)
507
                res = ET.SubElement(node_set, "res")
508
                res.set("type", "CPU")
509
                if cpu == 1:
510
                    res.set("amount", "100")
511
                else:
512
                    pass
513
                res = ET.SubElement(node_set, "res")
514
                res.set("type", "Memory")
515
                res.set("amount", `mem`)
516
                
517
                start = ET.SubElement(lease, "start")
518
                if start_time == -1:
519
                    lease.set("preemptible", "true")
520
                else:
521
                    lease.set("preemptible", "false")
522
                    exact = ET.SubElement(start, "exact")
523
                    exact.set("time", str(TimeDelta(seconds=start_time)))
524

    
525
                duration_elem = ET.SubElement(lease, "duration")
526
                duration_elem.set("time", str(TimeDelta(seconds=duration)))
527

    
528
                software = ET.SubElement(lease, "software")
529
                diskimage = ET.SubElement(software, "disk-image")
530
                diskimage.set("id", vm_image)
531
                diskimage.set("size", `vm_imagesize`)
532
                
533
                    
534
                lease_id += 1
535
        tree = ET.ElementTree(root)
536
        print ET.tostring(root)
537

    
538

    
539

    
540

    
541
# TODO: Has to be moved to the haizea.lwf package
542
class haizea_swf2lwf(Command):
543
    """
544
    Converts Standard Workload Format (SWF, used in the Parallel Workloads Archive at
545
    http://www.cs.huji.ac.il/labs/parallel/workload/) to Lease Workload Format
546
    """
547
    
548
    name = "haizea-swf2lwf"
549

    
550
    def __init__(self, argv):
551
        Command.__init__(self, argv)
552
        
553
        self.optparser.add_option(Option("-i", "--in", action="store",  type="string", dest="inf", required=True,
554
                                         help = """
555
                                         Input file
556
                                         """))
557
        self.optparser.add_option(Option("-o", "--out", action="store", type="string", dest="outf", required=True,
558
                                         help = """
559
                                         Output file
560
                                         """))
561
        self.optparser.add_option(Option("-p", "--preemptible", action="store", type="string", dest="preemptible", required=True,
562
                                         help = """
563
                                         Should the leases be preemptable or not?
564
                                         """))
565
        self.optparser.add_option(Option("-f", "--from", action="store", type="string", dest="from_time", default="00:00:00:00",
566
                                         help = """
567
                                         This parameter, together with the --amount parameter, allows converting just
568
                                         an interval of the SWF file.
569
                                         
570
                                         This parameter must be a timestamp of the format DD:HH:MM:SS. Only jobs sumitted
571
                                         at or after that time will be converted.
572
                                         
573
                                         Default is 00:00:00:00
574
                                         """))        
575
        self.optparser.add_option(Option("-l", "--interval-length", action="store", type="string", dest="interval_length",
576
                                         help = """
577
                                         Length of the interval in format DD:HH:MM:SS. Default is to convert jobs until the
578
                                         end of the SWF file.
579
                                         """))        
580
        self.optparser.add_option(Option("-q", "--queues", action="store", type="string", dest="queues",
581
                                         help = """
582
                                         Only convert jobs from the specified queues
583
                                         """))
584
        self.optparser.add_option(Option("-m", "--memory", action="store", type="string", dest="mem",
585
                                         help = """
586
                                         Memory requested by jobs.
587
                                         """))
588
        self.optparser.add_option(Option("-s", "--scale", action="store", type="string", dest="scale",
589
                                         help = """
590
                                         Scale number of processors by 1/SCALE.
591
                                         """))        
592
        self.optparser.add_option(Option("-n", "--site", action="store", type="string", dest="site",
593
                                         help = """
594
                                         File containing site description
595
                                         """))        
596
                
597
    def run(self):            
598
        self.parse_options()
599

    
600
        infile = self.opt.inf
601
        outfile = self.opt.outf
602
        
603
        from_time = Parser.DateTimeDeltaFromString(self.opt.from_time)
604
        if self.opt.interval_length == None:
605
            to_time = None
606
        else:
607
            to_time = from_time + Parser.DateTimeDeltaFromString(self.opt.interval_length)
608

    
609
        root = ET.Element("lease-workload")
610
        root.set("name", infile)
611
        description = ET.SubElement(root, "description")
612
        description.text = "Created with haizea-swf2lwf %s" % " ".join(self.argv[1:])
613

    
614
        if self.opt.site != None:
615
            site_elem = ET.parse(self.opt.site).getroot()
616
            site_num_nodes = int(site_elem.find("nodes").find("node-set").get("numnodes"))
617
            root.append(site_elem)
618
        
619
        time = TimeDelta(seconds=0)
620
        requests = ET.SubElement(root, "lease-requests")
621
        
622
        slowdowns = []
623
        users = set()
624
        utilization = 0
625
        utilization_no_ramp = 0
626
        if to_time == None:
627
            swf = open(infile, 'r')
628
            lines = swf.readlines()
629
            lastline = lines[-1]
630
            to_time = TimeDelta(seconds=int(lastline.split()[1]))
631
            swf.close()
632

    
633
        no_ramp_cutoff = from_time + ((to_time - from_time) * 0.05)
634

    
635
        infile = open(infile, "r")
636
        for line in infile:
637
            if line[0]!=';' and len(line.strip()) != 0:
638
                fields = line.split()
639
                
640
                # Unpack the job's attributes. The description of each field is
641
                # taken from the SWF documentation at
642
                # http://www.cs.huji.ac.il/labs/parallel/workload/swf.html
643
                
644
                # Job Number -- a counter field, starting from 1. 
645
                job_number = int(fields[0])
646
                
647
                # Submit Time -- in seconds. The earliest time the log refers to is zero, 
648
                # and is the submittal time the of the first job. The lines in the log are 
649
                # sorted by ascending submittal times. It makes sense for jobs to also be 
650
                # numbered in this order.
651
                submit_time = int(fields[1])
652

    
653
                # Wait Time -- in seconds. The difference between the job's submit time 
654
                # and the time at which it actually began to run. Naturally, this is only 
655
                # relevant to real logs, not to models.
656
                wait_time = int(fields[2])
657

    
658
                # Run Time -- in seconds. The wall clock time the job was running (end 
659
                # time minus start time).
660
                # We decided to use ``wait time'' and ``run time'' instead of the equivalent 
661
                # ``start time'' and ``end time'' because they are directly attributable to 
662
                # the scheduler and application, and are more suitable for models where only 
663
                # the run time is relevant.
664
                # Note that when values are rounded to an integral number of seconds (as 
665
                # often happens in logs) a run time of 0 is possible and means the job ran 
666
                # for less than 0.5 seconds. On the other hand it is permissable to use 
667
                # floating point values for time fields.
668
                run_time = int(fields[3])
669
                
670
                # Number of Allocated Processors -- an integer. In most cases this is also 
671
                # the number of processors the job uses; if the job does not use all of them, 
672
                # we typically don't know about it.
673
                num_processors_allocated = int(fields[4])
674
                
675
                # Average CPU Time Used -- both user and system, in seconds. This is the 
676
                # average over all processors of the CPU time used, and may therefore be 
677
                # smaller than the wall clock runtime. If a log contains the total CPU time 
678
                # used by all the processors, it is divided by the number of allocated 
679
                # processors to derive the average.
680
                avg_cpu_time = float(fields[5])
681
                
682
                # Used Memory -- in kilobytes. This is again the average per processor.
683
                used_memory = int(fields[6])
684
                
685
                # Requested Number of Processors.
686
                num_processors_requested = int(fields[7])
687
                
688
                # Requested Time. This can be either runtime (measured in wallclock seconds), 
689
                # or average CPU time per processor (also in seconds) -- the exact meaning 
690
                # is determined by a header comment. In many logs this field is used for 
691
                # the user runtime estimate (or upper bound) used in backfilling. If a log 
692
                # contains a request for total CPU time, it is divided by the number of 
693
                # requested processors.
694
                time_requested = int(fields[8])
695
                
696
                # Requested Memory (again kilobytes per processor).
697
                mem_requested = int(fields[9])
698
                
699
                # Status 1 if the job was completed, 0 if it failed, and 5 if cancelled. 
700
                # If information about chekcpointing or swapping is included, other values 
701
                # are also possible. See usage note below. This field is meaningless for 
702
                # models, so would be -1.
703
                status = int(fields[10])
704
                
705
                # User ID -- a natural number, between one and the number of different users.
706
                user_id = int(fields[11])
707
                
708
                # Group ID -- a natural number, between one and the number of different groups. 
709
                # Some systems control resource usage by groups rather than by individual users.
710
                group_id = int(fields[12])
711
                
712
                # Executable (Application) Number -- a natural number, between one and the number 
713
                # of different applications appearing in the workload. in some logs, this might 
714
                # represent a script file used to run jobs rather than the executable directly; 
715
                # this should be noted in a header comment.
716
                exec_number = int(fields[13])
717
                
718
                # Queue Number -- a natural number, between one and the number of different 
719
                # queues in the system. The nature of the system's queues should be explained 
720
                # in a header comment. This field is where batch and interactive jobs should 
721
                # be differentiated: we suggest the convention of denoting interactive jobs by 0.
722
                queue = int(fields[14])
723
                
724
                # Partition Number -- a natural number, between one and the number of different 
725
                # partitions in the systems. The nature of the system's partitions should be 
726
                # explained in a header comment. For example, it is possible to use partition 
727
                # numbers to identify which machine in a cluster was used.
728
                partition = int(fields[15])
729
                
730
                # Preceding Job Number -- this is the number of a previous job in the workload, 
731
                # such that the current job can only start after the termination of this preceding 
732
                # job. Together with the next field, this allows the workload to include feedback 
733
                # as described below.
734
                prec_job = int(fields[16])
735

    
736
                # Think Time from Preceding Job -- this is the number of seconds that should elapse 
737
                # between the termination of the preceding job and the submittal of this one. 
738
                prec_job_thinktime = int(fields[17])
739

    
740
                                
741
                # Check if we have to skip this job
742
                
743
                submit_time = TimeDelta(seconds=submit_time)
744
                
745
                if submit_time < from_time:
746
                    continue
747
                
748
                if to_time != None and submit_time > to_time:
749
                    break
750
                
751
                if run_time < 0 and status==5:
752
                    # This is a job that got cancelled while waiting in the queue
753
                    continue
754
                
755
                if self.opt.queues != None:
756
                    queues = [int(q) for q in self.opt.queues.split(",")]
757
                    if queue not in queues:
758
                        # Job was submitted to a queue we're filtering out
759
                        continue              
760
                    
761
                if num_processors_requested == -1:
762
                    num_processors = num_processors_allocated
763
                else:
764
                    num_processors = num_processors_requested
765
        
766
                if self.opt.scale != None:
767
                    num_processors = int(num_processors/int(self.opt.scale))
768
                    
769
                lease_request = ET.SubElement(requests, "lease-request")
770
                # Make submission time relative to starting time of trace
771
                lease_request.set("arrival", str(submit_time - from_time))
772

    
773
                if run_time == 0:
774
                    # As specified in the SWF documentation, a runtime of 0 means
775
                    # the job ran for less than a second, so we round up to 1.
776
                    run_time = 1 
777
                realduration = ET.SubElement(lease_request, "realduration")
778
                realduration.set("time", str(TimeDelta(seconds=run_time)))
779
                
780
                lease = ET.SubElement(lease_request, "lease")
781
                lease.set("id", `job_number`)
782

    
783
                
784
                nodes = ET.SubElement(lease, "nodes")
785
                node_set = ET.SubElement(nodes, "node-set")
786
                node_set.set("numnodes", `num_processors`)
787
                res = ET.SubElement(node_set, "res")
788
                res.set("type", "CPU")
789
                res.set("amount", "100")
790

    
791
                res = ET.SubElement(node_set, "res")
792
                res.set("type", "Memory")
793
                if self.opt.mem != None:
794
                    res.set("amount", self.opt.mem)
795
                elif mem_requested != -1:
796
                    res.set("amount", `mem_requested / 1024`)
797
                else:
798
                    print "Cannot convert this file. Job #%i does not specify requested memory, and --memory parameter not specified" % job_number
799
                    exit(-1)
800
                    
801
                if wait_time != -1:
802
                    if run_time < 10:
803
                        run_time2 = 10.0
804
                    else:
805
                        run_time2 = float(run_time)
806
                    slowdown = (wait_time + run_time2) / run_time2
807
                    slowdowns.append(slowdown)
808
                
809
                if not user_id in users:
810
                    users.add(user_id)
811

    
812
                # Total utilization
813
                utilization += run_time * num_processors
814

    
815
                # Removing ramp-up and ramp-down effects
816
                if wait_time != -1 and submit_time + run_time >= no_ramp_cutoff:
817
                    start_in_interval = max(no_ramp_cutoff, submit_time)
818
                    end_in_interval = min(to_time, submit_time + run_time)
819
                    time_in_interval = end_in_interval - start_in_interval
820
                    utilization_no_ramp += time_in_interval * num_processors
821

    
822
                start = ET.SubElement(lease, "start")
823
                lease.set("preemptible", self.opt.preemptible)
824
                lease.set("user", `user_id`)
825

    
826
                duration_elem = ET.SubElement(lease, "duration")
827
                duration_elem.set("time", str(TimeDelta(seconds=time_requested)))
828

    
829
                # No software environment specified. The annotator would have to be used to
830
                # add one (or an image file when running a simulation).
831
                software = ET.SubElement(lease, "software")
832
                diskimage = ET.SubElement(software, "none")
833
                
834
                # Add unused SWF attributes to the extra section, for future reference.
835
                extra = ET.SubElement(lease, "extra")
836
                attr = ET.SubElement(extra, "attr")
837
                attr.set("name", "SWF_waittime")
838
                attr.set("value", `wait_time`)
839
                attr = ET.SubElement(extra, "attr")
840
                attr.set("name", "SWF_runtime")
841
                attr.set("value", `run_time`)
842
                attr = ET.SubElement(extra, "attr")
843
                attr.set("name", "SWF_avgcputime")
844
                attr.set("value", `avg_cpu_time`)
845
                attr = ET.SubElement(extra, "attr")
846
                attr.set("name", "SWF_queue")
847
                attr.set("value", `queue`)
848
                attr = ET.SubElement(extra, "attr")
849
                attr.set("name", "SWF_group")
850
                attr.set("value", `group_id`)
851
                attr = ET.SubElement(extra, "attr")
852
                attr.set("name", "SWF_execnumber")
853
                attr.set("value", `exec_number`)
854
                    
855
        tree = ET.ElementTree(root)
856
        
857
        outfile = open(outfile, "w")
858
        tree.write(outfile)
859
        
860
        infile.close()
861
        outfile.close()
862
        
863
        slowdowns.sort()
864

    
865
        total_capacity = site_num_nodes * (to_time - from_time).seconds
866
        utilization = float(utilization) / float(total_capacity)
867
        utilization_no_ramp = float(utilization_no_ramp) / float(total_capacity)
868

    
869
        if len(slowdowns) > 0:
870
            print "SLOWDOWNS"
871
            print "---------"
872
            print "min: %.2f" % slowdowns[0]
873
            print "10p: %.2f" % percentile(slowdowns, 0.1)
874
            print "25p: %.2f" % percentile(slowdowns, 0.25)
875
            print "med: %.2f" % percentile(slowdowns, 0.5)
876
            print "75p: %.2f" % percentile(slowdowns, 0.75)
877
            print "90p: %.2f" % percentile(slowdowns, 0.9)
878
            print "max: %.2f" % slowdowns[-1]
879
            print 
880
        print "USERS"
881
        print "-----"
882
        print "Number of users: %i" % len(users)
883
        print 
884
        print "UTILIZATION"
885
        print "-----------"
886
        print "Utilization: %.2f%%" % (utilization * 100)
887
        if utilization_no_ramp != 0:
888
            print "Utilization (no ramp-up/ramp-down): %.2f%%" % (utilization_no_ramp * 100)
889