我有一个包含2d数组的字典。我试图通过这种方式计算均值,但它不起作用,因为数组也包含nan值。那么有没有更简单的方法来计算平均值?
All = np.zeros(385000).reshape(550,700)
for i in dic.keys():
a = dic[i]['data']
avg = (All+a)/len(dic.keys())
答案 0 :(得分:1)
您似乎正在尝试在输入a
和b
中逐个考虑元素,忽略NaNs
。因此,一种方法是使用np.dstack
堆叠这两个数组,这将沿着第三个轴堆叠a
和b
,然后沿着同一轴简单地使用np.nanmean
。因此,我们将有一个像这样的简单实现 -
np.nanmean(np.dstack((a,b)),axis=2)
示例运行 -
In [28]: a
Out[28]:
array([[ 2., nan],
[ 5., 4.]])
In [29]: b
Out[29]:
array([[ nan, 3.],
[ 7., 2.]])
In [30]: np.nanmean(np.dstack((a,b)),axis=2)
Out[30]:
array([[ 2., 3.],
[ 6., 3.]])
对于从问题的发布代码中显示的字典中获取那些2D
数组的情况,您可以使用循环理解将这些数组收集为3D
数组使用np.dstack
并最后沿最后一个轴使用np.nanmean
,就像这样 -
np.nanmean(np.dstack([d['data'] for d in dic]),axis=2)
答案 1 :(得分:1)
上面的答案是绝对好的,但是version: '3.1'
services:
cassandra:
container_name: "cassandra"
image: cassandra
ports:
- 9042:9042
volumes:
- /home/cassandra:/var/lib/cassandra
postgresql:
container_name: "postgresql"
image: postgres:11.1-alpine
restart: always
environment:
POSTGRES_DB: mywebapp
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
volumes:
#- ./startup.sql:/docker-entrypoint-initdb.d/startup.sql
- postgresdata:/var/lib/postgresql/data
ports:
- 5432:5432
mywebapp:
container_name: "mywebapp"
image: openjdk:10-jre-slim
hostname: mywebapp
volumes:
- ./lib:/home/lib
- ./mywebapp-1.0.1-SNAPSHOT-exec.jar:/home/mywebapp-1.0.1-SNAPSHOT-exec.jar
entrypoint:
- java
- -jar
- -Djava.library.path=/home/lib
- /home/mywebapp-1.0.1-SNAPSHOT-exec.jar
environment:
- LD_LIBRARY_PATH=/home/lib
- spring.datasource.url=jdbc:postgresql://postgresql:5432/mywebapp
- spring.cassandra.contactpoints=cassandra
- spring.cassandra.port=9042
- spring.cassandra.keyspace=mywebapp
#- spring.datasource.username=postgres
#- spring.datasource.password=postgres
#- spring.jpa.hibernate.ddlAuto=update+
ports:
- 8443:8443
- 8080:8080
depends_on:
- cassandra
似乎不太灵活或直观。我们还可以使用np.dstack((a,b))
来提供更直观的显示。请参见下面的示例。
np.stack()
a=np.array([[2,np.nan],[5,4]])
b=np.array([[np.nan,3],[7,2]])
c=np.stack((a,b),axis=0)
print(a)
print('='*50)
print(b)
print('='*50)
print(c)
print('='*50)
print(np.nanmean(c,axis=0))
[[ 2. nan]
[ 5. 4.]]
==================================================
[[nan 3.]
[ 7. 2.]]
==================================================
[[[ 2. nan]
[ 5. 4.]]
[[nan 3.]
[ 7. 2.]]]
==================================================
[[2. 3.]
[6. 3.]]
和np.dstack()
之间的差异可以通过我编写的以下示例找到。
np.stack()
dr1=np.array([[1,2,3],[4,5,6],[7,8,9]])
print(dr1)
dr2=np.array([[9,8,7],[6,5,4],[3,2,1]])
print(dr2)
print('='*50)
dr3=np.dstack((dr1,dr2))
print(dr3.shape)
print(dr3)
print(np.sum(dr3,axis=2)) # This will be (row,col,time) but display (col,time) => (row,col) in each row, the 1 in dr2 will be in [3,3,2] => 3 [3,2]
print('='*50)
dr4=np.stack((dr1,dr2),axis=0) # This will be (time,row,col) and display (row,col) => (row,col) in each time, the 1 in dr2 will be in [2,3,3] => 2 [3,3]
print(dr4.shape)
print(dr4)
print(np.sum(dr4,axis=0))