Question

我正在运行下面的代码。

import datetime
import pandas as pd
import numpy as np
import pylab as pl
import datetime
from sklearn.cluster import AffinityPropagation
from sklearn import metrics
from matplotlib.collections import LineCollection
from pandas_datareader import data as wb

from sklearn import cluster, covariance, manifold

###############################################################################

start = '2019-02-01'
end = '2020-02-01'

tickers = ['MMM',
'ABT',
'ABBV',
'ABMD',
'ACN',
'ATVI']

thelen = len(tickers)

price_data = []
for ticker in tickers:
    prices = wb.DataReader(ticker, start = start, end = end, data_source='yahoo')[['Open','Adj Close']]
    price_data.append(prices.assign(ticker=ticker)[['ticker', 'Open', 'Adj Close']])

df = pd.concat(price_data)

df.rename(columns = {'ticker':'Ticker', 'Adj Close':'Close'}, inplace = True) 
df.dtypes
df.head()
df.shape
#df.reset_index()

pd.set_option('display.max_columns', 500)

open = np.array([df.Open]).astype(np.float)
close = np.array([df.Close]).astype(np.float)

# The daily variations of the quotes are what carry most information
variation = (close - open)

上面的代码在这里给了我这个1d数组。

    0   1   2   3   4   5   6   7   8   9   10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30
0   0.38    0.93    0.3 0.72    -0.42   0.37    0.36    0.71    0.89    -0.32   0.11    -0.06   -0.17   0.4 0.25    -0.48   0.1 -0.29   -0.29   -0.38   0.21    0.22    0.11    -0.01   -0.07   -0.66   0   -0.78   0.24    -0.89   0.07

我想要的输出将是一个二维数组，像这样。

    0       1       2       3       4       5       6       7       8       9       10
0   0.38    0.93    0.3     0.72    -0.42   0.37    0.36    0.71    0.89    -0.32   0.11    
1   0.61    0.18    0.63    0.02    -0.03   -0.27   -0.75   -1      0.48    -0.74   -0.34   
2   1.77    0.95    1.69    2.05    -1.36   2.25    1.83    -0.8    1.35    -0.99   -1.35   
3   0.7     -0.12   0.32    -0.14   -0.53   0.63    0.85    0.46    0.23    -0.83   0.59    
4   1.71    -0.8    0.74    -0.58   -1.2    0.38    0.35    0.06    0.56    -0.38   0.64    
5   0.47    0.25    0.93    -0.9    -0.15   0.64    -0.11   -0.09   0.44    -0.47   -0.09

我如何将我的1d数组更改为2d数组，并且水平和水平方向之间的区别以及垂直方向上不同的股票水平？谢谢吗？

Answer 1

我实际上使它起作用。显然，您必须将项目存储在列表中，而不是数据框中。

import datetime
import pandas as pd
import numpy as np
import pylab as pl
import datetime
from sklearn.cluster import AffinityPropagation
from sklearn import metrics
from matplotlib.collections import LineCollection
from pandas_datareader import data as wb
from sklearn import cluster, covariance, manifold


start = '2019-02-01'
end = '2020-02-01'

tickers = ['AXP',
        'AAPL',
        'BA',
        'CAT',
        'CSCO',
        'CVX',
        'XOM',
        'GS',
        'HD',
        'IBM',
        'INTC',
        'JNJ',
        'KO',
        'JPM',
        'MCD',
        'MMM',
        'MRK',
        'MSFT',
        'NKE',
        'PFE',
        'PG',
        'TRV',
        'UNH',
        'UTX',
        'VZ',
        'V',
        'WBA',
        'WMT',
        'DIS']

thelen = len(tickers)

price_data = []
for ticker in tickers:
    prices = wb.DataReader(ticker, start = start, end = end, data_source='yahoo')[['Open','Adj Close']]
    price_data.append(prices.assign(ticker=ticker)[['ticker', 'Open', 'Adj Close']])

#names = np.reshape(price_data, (len(price_data), 1))

names = pd.concat(price_data)
names.reset_index()

#pd.set_option('display.max_columns', 500)

open = np.array([q['Open'] for q in price_data]).astype(np.float)
close = np.array([q['Adj Close'] for q in price_data]).astype(np.float)

#close_prices = np.array([q.close for q in quotes]).astype(np.float)

# The daily variations of the quotes are what carry most information
variation = (close - open)


# pd.DataFrame(variation).to_csv("C:\\path\\file.csv")

# Learn a graphical structure from the correlations
edge_model = covariance.GraphicalLassoCV()
X = variation

# standardize the time series: using correlations rather than covariance
# is more efficient for structure recovery
X = variation.copy().T
X /= X.std(axis=0)
edge_model.fit(X)


# Cluster using affinity propagation

_, labels = cluster.affinity_propagation(edge_model.covariance_)
n_labels = labels.max()


details = [(name,cluster) for name, cluster in zip(tickers,labels)]
for detail in details:
    print(detail)

如何将一维数组转换为二维数组？

1 个答案: