我想在numpy中从多元正态分布生成样本。这是我的代码:
import numpy as np
import random
from random import uniform
import math
from sklearn.datasets import make_spd_matrix
k = 5 # how many classes
N = 20 # how many samples
x_min = -20
x_max = 20
y_min = -20
y_max = 20
def main():
w = np.random.random(k) # generate k random numbers
w /= w.sum() # divide by their sum
# now these sum to 1
#print(w) # actual weight vector
number_of_divisions_x = number_of_divisions_y = math.ceil(math.sqrt(k))
x_div = (x_max - x_min)/number_of_divisions_x
y_div = (y_max - y_min)/number_of_divisions_y
# divide a grid
# x = x_min to x_max
# y = y_min to y_max
# now form the grid and put the means
points = [] # the list of actual means
# now generate means
for i in range(number_of_divisions_x):
for j in range(number_of_divisions_y):
if len(points) == k: # after generating k points, break the loop
break
point = [x_min + i * x_div, y_min + j * y_div] # array (not tuple) is needed for the function
points.append(point)
if len(points) == k: # after generating k points, break the loop
break
samples = [] # list containing all the samples
actual_sample = [None] * k # how many samples in each class actually. Stored for later weight correction
covs = [] # actual covariances
for i in range(k):
number_of_samples = math.ceil(N * w[i]) # number of samples from class i, generate at least one sample
actual_sample[i] = number_of_samples # for later weight correction
if i == k-1:
number_of_samples = N - sum(actual_sample[:k-1]) # for the last class, take all the remaining samples
actual_sample[i] = number_of_samples
mean = points[i]
# generate covariance matrix
cov = make_spd_matrix(2) # Generate a random symmetric, positive-definite matrix, whose size is 2x2
covs.append(cov)
file = open("class " + str(i+1) + ".txt", "w") # class 1, 2, 3 etc
file.write(str(mean) + "\n\n")
file.write(str(cov))
file.close() # save the actual mean and covariance to file
s = np.random.multivariate_normal(mean, cov, number_of_samples) # list of lists
for element in s:
e = list(element)
samples.append(e) # make list and insert to the main list
如果我运行代码大约10次,它会执行9次没有任何错误,但在一次运行期间显示错误。
s = np.random.multivariate_normal(mean, cov, number_of_samples) # list of lists
File "mtrand.pyx", line 4508, in mtrand.RandomState.multivariate_normal
File "mtrand.pyx", line 1550, in mtrand.RandomState.standard_normal
File "mtrand.pyx", line 167, in mtrand.cont0_array
ValueError: negative dimensions are not allowed
因此,错误来自多元正态分布的随机数生成。我使用make_spd_matrix()
生成协方差矩阵为正定,因此协方差矩阵的行列式应始终为正。那为什么我有时会收到错误?这里的手段清单是:
[[-20.0, -20.0], [-20.0, -6.666666666666666], [-20.0, 6.666666666666668], [-6.666666666666666, -20.0], [-6.666666666666666, -6.666666666666666]]