Question

我想在numpy中从多元正态分布生成样本。这是我的代码：

import numpy as np
import random
from random import uniform
import math
from sklearn.datasets import make_spd_matrix

k = 5   # how many classes
N = 20   # how many samples
x_min = -20
x_max = 20
y_min = -20
y_max = 20


def main():

w = np.random.random(k)    # generate k random numbers
w /= w.sum() # divide by their sum
# now these sum to 1
#print(w)  # actual weight vector

number_of_divisions_x = number_of_divisions_y = math.ceil(math.sqrt(k))
x_div = (x_max - x_min)/number_of_divisions_x
y_div = (y_max - y_min)/number_of_divisions_y

# divide a grid
# x = x_min to x_max
# y = y_min to y_max

# now form the grid and put the means

points = []  # the list of actual means

# now generate means
for i in range(number_of_divisions_x):
    for j in range(number_of_divisions_y):
        if len(points) == k:   # after generating k points, break the loop
            break
        point = [x_min + i * x_div, y_min + j * y_div]  # array (not tuple) is needed for the function
        points.append(point)

    if len(points) == k:  # after generating k points, break the loop
        break  

samples = []  # list containing all the samples
actual_sample = [None] * k   # how many samples in each class actually. Stored for later weight correction

covs = []    # actual covariances

for i in range(k):

    number_of_samples = math.ceil(N * w[i])  # number of samples from class i, generate at least one sample
    actual_sample[i] = number_of_samples  # for later weight correction

    if i == k-1:
        number_of_samples = N - sum(actual_sample[:k-1])   # for the last class, take all the remaining samples
        actual_sample[i] = number_of_samples

    mean = points[i]

    # generate covariance matrix

    cov = make_spd_matrix(2)   # Generate a random symmetric, positive-definite matrix, whose size is 2x2

    covs.append(cov)

    file = open("class " + str(i+1) + ".txt", "w")  # class 1, 2, 3 etc
    file.write(str(mean) + "\n\n")
    file.write(str(cov))
    file.close() # save the actual mean and covariance to file

    s = np.random.multivariate_normal(mean, cov, number_of_samples)  # list of lists
    for element in s:
        e = list(element)
        samples.append(e)   # make list and insert to the main list

如果我运行代码大约10次，它会执行9次没有任何错误，但在一次运行期间显示错误。

s = np.random.multivariate_normal(mean, cov, number_of_samples)  # list of lists

File "mtrand.pyx", line 4508, in mtrand.RandomState.multivariate_normal 
File "mtrand.pyx", line 1550, in mtrand.RandomState.standard_normal
File "mtrand.pyx", line 167, in mtrand.cont0_array

ValueError: negative dimensions are not allowed

因此，错误来自多元正态分布的随机数生成。我使用make_spd_matrix()生成协方差矩阵为正定，因此协方差矩阵的行列式应始终为正。那为什么我有时会收到错误？这里的手段清单是：

[[-20.0, -20.0], [-20.0, -6.666666666666666], [-20.0, 6.666666666666668], [-6.666666666666666, -20.0], [-6.666666666666666, -6.666666666666666]]

ValueError：从多元正态分布生成样本时，不允许使用负维度

0 个答案: