我是机器学习的新手,下面有两个关于我的代码的问题。 我的第一个问题是关于应该具有的群集数量,我的理解是,您应该具有与要素(和或列)数量相同的群集数量。 我的第二个问题是天气,或者我的代码没有正确排列,我正在获取输出,因此我认为它可以正常工作。
Code:
from random import randint
import pandas_datareader.data as web
import pandas as pd
import datetime
import itertools as it
import numpy as np
import csv
import matplotlib.pyplot as plt
df = pd.read_csv('Filename.txt')
df.columns = ['Date','Col1','Col2','Col3']
#Date is %Y-%m-%d format, and columns are integers.
reversed_df = df.iloc[::-1]
df["Date"] = df["Date"].astype("datetime64[ns]")
#print(df.dtypes)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
Date = reversed_df.Date
ColumnOne = reversed_df.Col1
ColumnTwo = reversed_df.Col2
ColumnThree = reversed_df.Col3
df = pd.concat([ColumnOne, ColumnTwo, ColumnThree], axis=1)
#print(df)
data = df
target = ColumnOne
#Target would be one of the features(Columns).
from sklearn.model_selection import train_test_split
data_train, data_test, target_train, target_test = train_test_split(data,target, test_size = 0.30, random_state = 1)
from sklearn.cluster import KMeans
kmeans_model = KMeans(n_clusters=3, random_state = 1)
#Number of clusters = Number of features(Columns).
data1 = data_train
data2 = target_train
kmeans_model.fit(data1, data2)
labels = kmeans_model.labels_
print(labels)
from sklearn.decomposition import PCA
pca_2 = PCA(2)
plot_columns = pca_2.fit_transform(data1, data2)
plt.scatter(x=plot_columns[:,0], y=plot_columns[:,1], c=labels)
plt.show()