root / branches / 1.1 / src / haizea / common / stats.py @ 847
1 |
# -------------------------------------------------------------------------- #
|
---|---|
2 |
# Copyright 2006-2009, University of Chicago #
|
3 |
# Copyright 2008-2009, Distributed Systems Architecture Group, Universidad #
|
4 |
# Complutense de Madrid (dsa-research.org) #
|
5 |
# #
|
6 |
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
|
7 |
# not use this file except in compliance with the License. You may obtain #
|
8 |
# a copy of the License at #
|
9 |
# #
|
10 |
# http://www.apache.org/licenses/LICENSE-2.0 #
|
11 |
# #
|
12 |
# Unless required by applicable law or agreed to in writing, software #
|
13 |
# distributed under the License is distributed on an "AS IS" BASIS, #
|
14 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
|
15 |
# See the License for the specific language governing permissions and #
|
16 |
# limitations under the License. #
|
17 |
# -------------------------------------------------------------------------- #
|
18 |
|
19 |
import random |
20 |
import math |
21 |
from haizea.common.utils import abstract |
22 |
|
23 |
|
24 |
class Distribution(object): |
25 |
def __init__(self): |
26 |
self.random = random.Random()
|
27 |
|
28 |
def seed(self, x): |
29 |
self.random.seed(x)
|
30 |
|
31 |
class ContinuousDistribution(Distribution): |
32 |
def __init__(self): |
33 |
Distribution.__init__(self)
|
34 |
|
35 |
def get(self): |
36 |
abstract() |
37 |
|
38 |
def get_list(self, n): |
39 |
l = [] |
40 |
for i in xrange(1, n): |
41 |
l.append(self.get())
|
42 |
return l
|
43 |
|
44 |
|
45 |
class BoundedContinuousDistribution(ContinuousDistribution): |
46 |
def __init__(self, min, max): |
47 |
ContinuousDistribution.__init__(self)
|
48 |
self.min = float(min) |
49 |
self.max = float(max) |
50 |
|
51 |
|
52 |
class UniformDistribution(BoundedContinuousDistribution): |
53 |
def __init__(self, min, max): |
54 |
BoundedContinuousDistribution.__init__(self, min, max) |
55 |
|
56 |
def get(self): |
57 |
return self.random.uniform(self.min, self.max) |
58 |
|
59 |
class NormalDistribution(ContinuousDistribution): |
60 |
def __init__(self, mu, sigma): |
61 |
ContinuousDistribution.__init__(self)
|
62 |
self.mu = mu
|
63 |
self.sigma = sigma
|
64 |
|
65 |
def get(self): |
66 |
return self.random.normalvariate(self.mu, self.sigma) |
67 |
|
68 |
class BoundedNormalDistribution(BoundedContinuousDistribution): |
69 |
def __init__(self, min, max, mu, sigma): |
70 |
BoundedContinuousDistribution.__init__(self, min, max) |
71 |
self.mu = float(mu) |
72 |
self.sigma = float(sigma) |
73 |
|
74 |
def get(self): |
75 |
n = self.random.normalvariate(self.mu, self.sigma) |
76 |
if n < self.min: |
77 |
n = self.min
|
78 |
elif n > self.max: |
79 |
n = self.max
|
80 |
return n
|
81 |
|
82 |
|
83 |
class BoundedParetoDistribution(BoundedContinuousDistribution): |
84 |
def __init__(self, min, max, alpha, invert = False): |
85 |
BoundedContinuousDistribution.__init__(self, min, max) |
86 |
self.alpha = float(alpha) |
87 |
self.invert = invert
|
88 |
|
89 |
def get(self): |
90 |
u = self.random.random()
|
91 |
l = self.min
|
92 |
h = self.max
|
93 |
a = self.alpha
|
94 |
p = (-((u*h**a - u*l**a - h**a)/((h**a)*(l**a))))**(-1/a)
|
95 |
if self.invert: |
96 |
p = h - p |
97 |
return p
|
98 |
|
99 |
class TruncatedParetoDistribution(BoundedContinuousDistribution): |
100 |
def __init__(self, min, max, scale, alpha, invert = False): |
101 |
BoundedContinuousDistribution.__init__(self, min, max) |
102 |
self.alpha = float(alpha) |
103 |
self.scale = float(scale) |
104 |
self.invert = invert
|
105 |
|
106 |
def get(self): |
107 |
# Temporary kludge. This just happens to be a range
|
108 |
# that, with shape and scale both 1.0, yields a roughly
|
109 |
# 80-20 distribution
|
110 |
min2 = 0.0
|
111 |
max2 = 10.0
|
112 |
v = max2 + 1
|
113 |
while v > max2:
|
114 |
u = self.random.random()
|
115 |
pareto = self.scale/u**(1/self.alpha) |
116 |
v = pareto - (self.scale - min2)
|
117 |
|
118 |
v = self.min + (v/10.0)*(self.max - self.min) |
119 |
|
120 |
if self.invert: |
121 |
v = self.max - (v - self.min) |
122 |
return v
|
123 |
|
124 |
|
125 |
class DiscreteDistribution(object): |
126 |
def __init__(self, values): |
127 |
self.values = values
|
128 |
self.num_values = len(self.values) |
129 |
self.__distribution = None |
130 |
|
131 |
def _set_distribution(self, dis): |
132 |
self.__distribution = dis
|
133 |
|
134 |
# Expects value in [0,1)
|
135 |
def _get_from_prob(self, prob): |
136 |
pos = int(math.floor(prob * self.num_values)) |
137 |
return self.values[pos] |
138 |
|
139 |
def seed(self, x): |
140 |
self.__distribution.seed(x)
|
141 |
|
142 |
def get(self): |
143 |
return self._get_from_prob(self.__distribution.get()) |
144 |
|
145 |
|
146 |
class DiscreteUniformDistribution(DiscreteDistribution): |
147 |
def __init__(self, values): |
148 |
DiscreteDistribution.__init__(self, values)
|
149 |
self._set_distribution(UniformDistribution(0,1)) |
150 |
|
151 |
class DiscreteTruncatedParetoDistribution(DiscreteDistribution): |
152 |
def __init__(self, values, scale, alpha, invert = False): |
153 |
DiscreteDistribution.__init__(self, values)
|
154 |
self._set_distribution(TruncatedParetoDistribution(0,1,scale,alpha,invert)) |
155 |
|
156 |
def percentile(values, percent): |
157 |
pos = int(len(values) * percent) |
158 |
return values[pos]
|
159 |
|
160 |
def print_percentiles(values): |
161 |
svalues = sorted(values)
|
162 |
print "min: %.2f" % svalues[0] |
163 |
print "10p: %.2f" % percentile(svalues, 0.1) |
164 |
print "25p: %.2f" % percentile(svalues, 0.25) |
165 |
print "med: %.2f" % percentile(svalues, 0.5) |
166 |
print "75p: %.2f" % percentile(svalues, 0.75) |
167 |
print "90p: %.2f" % percentile(svalues, 0.9) |
168 |
print "max: %.2f" % svalues[-1] |
169 |
|
170 |
def print_distribution(counts, N): |
171 |
values = counts.keys() |
172 |
values.sort() |
173 |
|
174 |
cumm = 0.0
|
175 |
for v in values: |
176 |
cumm += counts[v] |
177 |
print "%s: %i (%.2f%%, cumm %.2f%%)" % (v, counts[v], (float(counts[v])/N)*100, (cumm/N)*100) |
178 |
|