我正在运行下面的代码。
import datetime
import pandas as pd
import numpy as np
import pylab as pl
import datetime
from sklearn.cluster import AffinityPropagation
from sklearn import metrics
from matplotlib.collections import LineCollection
from pandas_datareader import data as wb
from sklearn import cluster, covariance, manifold
###############################################################################
start = '2019-02-01'
end = '2020-02-01'
tickers = ['MMM',
'ABT',
'ABBV',
'ABMD',
'ACN',
'ATVI']
thelen = len(tickers)
price_data = []
for ticker in tickers:
prices = wb.DataReader(ticker, start = start, end = end, data_source='yahoo')[['Open','Adj Close']]
price_data.append(prices.assign(ticker=ticker)[['ticker', 'Open', 'Adj Close']])
df = pd.concat(price_data)
df.rename(columns = {'ticker':'Ticker', 'Adj Close':'Close'}, inplace = True)
df.dtypes
df.head()
df.shape
#df.reset_index()
pd.set_option('display.max_columns', 500)
open = np.array([df.Open]).astype(np.float)
close = np.array([df.Close]).astype(np.float)
# The daily variations of the quotes are what carry most information
variation = (close - open)
上面的代码在这里给了我这个1d数组。
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
0 0.38 0.93 0.3 0.72 -0.42 0.37 0.36 0.71 0.89 -0.32 0.11 -0.06 -0.17 0.4 0.25 -0.48 0.1 -0.29 -0.29 -0.38 0.21 0.22 0.11 -0.01 -0.07 -0.66 0 -0.78 0.24 -0.89 0.07
我想要的输出将是一个二维数组,像这样。
0 1 2 3 4 5 6 7 8 9 10
0 0.38 0.93 0.3 0.72 -0.42 0.37 0.36 0.71 0.89 -0.32 0.11
1 0.61 0.18 0.63 0.02 -0.03 -0.27 -0.75 -1 0.48 -0.74 -0.34
2 1.77 0.95 1.69 2.05 -1.36 2.25 1.83 -0.8 1.35 -0.99 -1.35
3 0.7 -0.12 0.32 -0.14 -0.53 0.63 0.85 0.46 0.23 -0.83 0.59
4 1.71 -0.8 0.74 -0.58 -1.2 0.38 0.35 0.06 0.56 -0.38 0.64
5 0.47 0.25 0.93 -0.9 -0.15 0.64 -0.11 -0.09 0.44 -0.47 -0.09
我如何将我的1d数组更改为2d数组,并且水平和水平方向之间的区别以及垂直方向上不同的股票水平?谢谢吗?
答案 0 :(得分:0)
我实际上使它起作用。显然,您必须将项目存储在列表中,而不是数据框中。
import datetime
import pandas as pd
import numpy as np
import pylab as pl
import datetime
from sklearn.cluster import AffinityPropagation
from sklearn import metrics
from matplotlib.collections import LineCollection
from pandas_datareader import data as wb
from sklearn import cluster, covariance, manifold
start = '2019-02-01'
end = '2020-02-01'
tickers = ['AXP',
'AAPL',
'BA',
'CAT',
'CSCO',
'CVX',
'XOM',
'GS',
'HD',
'IBM',
'INTC',
'JNJ',
'KO',
'JPM',
'MCD',
'MMM',
'MRK',
'MSFT',
'NKE',
'PFE',
'PG',
'TRV',
'UNH',
'UTX',
'VZ',
'V',
'WBA',
'WMT',
'DIS']
thelen = len(tickers)
price_data = []
for ticker in tickers:
prices = wb.DataReader(ticker, start = start, end = end, data_source='yahoo')[['Open','Adj Close']]
price_data.append(prices.assign(ticker=ticker)[['ticker', 'Open', 'Adj Close']])
#names = np.reshape(price_data, (len(price_data), 1))
names = pd.concat(price_data)
names.reset_index()
#pd.set_option('display.max_columns', 500)
open = np.array([q['Open'] for q in price_data]).astype(np.float)
close = np.array([q['Adj Close'] for q in price_data]).astype(np.float)
#close_prices = np.array([q.close for q in quotes]).astype(np.float)
# The daily variations of the quotes are what carry most information
variation = (close - open)
# pd.DataFrame(variation).to_csv("C:\\path\\file.csv")
# Learn a graphical structure from the correlations
edge_model = covariance.GraphicalLassoCV()
X = variation
# standardize the time series: using correlations rather than covariance
# is more efficient for structure recovery
X = variation.copy().T
X /= X.std(axis=0)
edge_model.fit(X)
# Cluster using affinity propagation
_, labels = cluster.affinity_propagation(edge_model.covariance_)
n_labels = labels.max()
details = [(name,cluster) for name, cluster in zip(tickers,labels)]
for detail in details:
print(detail)