我试图通过MATLAB函数kmean绘制聚类,但是得到的质心过多,不知道为什么。这是我的代码和一个图形示例:
rng(1);
wv_prop = [min_pts(:) slope(:)];
if (isempty(wv_prop)==0)
[idx,C] = kmeans(wv_prop,2);
subplot(3,2,5);
plot(wv_prop(idx==1,1),wv_prop(idx==1,2),'b.','MarkerSize',12);
hold on
plot(wv_prop(idx==2,1),wv_prop(idx==2,2),'r.','MarkerSize',12);
plot(C(:,1),C(:,2),'kx',...
'MarkerSize',15,'LineWidth',3)
以下是我使用的数据示例: wv_prop:
-7.50904246127179e-05 2.52737793199461e-05
-7.64715493632322e-05 -29.2845021783221
-8.16630514296111e-05 -15.5896244315076
-8.60516901697005e-05 3.87325886247646e-05
-9.07390060961131e-05 4.06844795948271e-05
-7.93980060844007e-05 3.72806601486833e-05
-8.08420950480078e-05 3.81372062193057e-05
-8.53045358845788e-05 4.00072285969318e-05
-7.07712622172574e-05 3.55502071296987e-05
-8.02846575361635e-05 3.91085777803079e-05
-8.82904795076420e-05 4.21557386394776e-05
-8.32088783242009e-05 4.08103587885502e-05
-8.17564769131708e-05 4.06201592898485e-05
-8.88574631122910e-05 4.31980154605407e-05
-9.55496137235401e-05 4.55119867638717e-05
-7.11241881995855e-05 3.72772062250438e-05
-8.20641318582800e-05 6.09118479264444e-05
-7.92369664739745e-05 5.86246041439769e-05
-7.61219361068837e-05 5.57318660221894e-05
-8.52918510230295e-05 5.84710267850959e-05
-8.99668387994064e-05 5.84558301867090e-05
-9.62926333243702e-05 5.87762601336998e-05
-7.87678776488358e-05 4.67111894400931e-05
-7.53525297201741e-05 4.13207831828739e-05
-7.71766983561651e-05 3.82625914011195e-05
-9.03499693359608e-05 4.06874790212135e-05
-7.59387077492098e-05 2.92390401569819e-05
-7.97649576465785e-05 32.1683359898974
-8.06408560217508e-05 1.55409105433306e-05
-8.10515208048491e-05 1.31180389653758e-05
-7.70540121076476e-05 9.43353748786386e-06
-7.24001267378072e-05 5.78599898248438e-06
-8.93350436455590e-05 9.61034087028361e-06
-7.97722332494743e-05 4.89104076311932e-06
-8.40022599007737e-05 5.06726288587479e-06
-7.89655937936233e-05 2.44686642783556e-06
-8.58007004774045e-05 4.06628163987085e-06
-7.68775819259902e-05 1.06146142996962e-06
-7.05769224846652e-05 -2.25666633700963e-06
-7.73022200637920e-05 1.34546072255262e-06
-7.65784897728499e-05 1.62917829786978e-06
-7.41548367397790e-05 1.46536230997079e-06
-9.17371298592096e-05 1.17025036839378e-05
-7.35354500231489e-05 4.43710161064086e-06
function [] = Select_Figs(filename,startblock,endblock,startclust,endclust,animal,day)
%Select_Figs - Plots average waveforms, standard deviation, difference over time,
%fitted peak location histogram, mean squared error, k-mean clustered peak location and slope,
%and raw waveforms across selected blocks and clusters,
%saves to folder Selected-Figures-animal-date
%
%Select_Figs(filename,startblock,endblock,startclust,endclust,animal,date)
%
%filename - Sort.mat(e.g. = 'Sort.mat')
%
%startblock- first block (e.g. = 7)
%
%endblock - last block (e.g. = 12)
%
%startclust - first cluster (e.g. = 5)
%
%endclust - last cluster (e.g. = 10)
%
%animal - animal number (e.g. = 12)
%
%date - start of experiment (e.g. = 101617)
%
%Function called by User_Sort.m
Sort = filename;
addpath(pwd);
%Get Sort file
foldername = sprintf('Selected-Figures-%s-%s',animal,day); %Creates dynamic folder name to store figures
mkdir(foldername); %Makes directory
cd(fullfile(foldername)); %Cd to new directory
tvec = 0:.013653333:(.013653333*97); %Time vector
t = tvec(2:end);
for clust = startclust:endclust %Loops through all clusters
fig = cell(1,endblock); %Preallocate # of figures
name = sprintf('Idx_%d',clust); %Individual cluster name
fig{clust} = figure('Visible', 'off'); %Turns figure visibility off
for block = startblock:endblock %Loop through all blocks
wvfrms_avg =Sort.(name)(block).avg;
wvfrms_avg_scaled = (wvfrms_avg*10^6);
wvfrms_std =Sort.(name)(block).standdev;
min_ind = wvfrms_avg_scaled == min(wvfrms_avg_scaled);
min_loc = t(min_ind);
[~,io] = findpeaks(wvfrms_avg_scaled);
leftmin = io<find(wvfrms_avg_scaled==min(wvfrms_avg_scaled));
leftmin = leftmin(leftmin~=0);
rightmin = io>find(wvfrms_avg_scaled==min(wvfrms_avg_scaled));
rightmin = rightmin(rightmin~=0);
if (isempty(wvfrms_avg_scaled)==0)
subplot(3,2,1);
if (isnan(wvfrms_avg_scaled)==0)&((-30<min(wvfrms_avg_scaled))||(min_loc>0.55)||(min_loc<0.3)||(length(io(leftmin))>2)||(length(io(rightmin))>2))
plot(tvec(1:end-1),wvfrms_avg_scaled,'r');
else
plot(tvec(1:end-1),wvfrms_avg_scaled,'b');
end
end
new_wv = wvfrms_avg_scaled(40:end);
[~,locs_scaled] = findpeaks(new_wv);
if isempty(locs_scaled)==1
ind_scaled = max(new_wv);
else
ind_scaled = locs_scaled(1);
end
x1_scaled = new_wv(find(min(wvfrms_avg_scaled)));
y1_scaled = min(wvfrms_avg_scaled);
x2_scaled = ind_scaled;
y2_scaled = new_wv(find(ind_scaled));
slope_scaled= (y2_scaled-y1_scaled)./(x2_scaled-x1_scaled);
if (isnan(wvfrms_avg_scaled)==0)
if ((-30<min(wvfrms_avg_scaled)))
lab = sprintf('Time (ms) \n Peak exceeds amplitude range (%s)',num2str(min(wvfrms_avg_scaled)));
xlabel(lab,'FontSize',8);
ylabel('Mean Voltage (\muV)','FontSize',8);
title('Average Waveform','FontSize',8);
elseif ((min_loc>0.55)||(min_loc<0.3))
lab = sprintf('Time (ms) \n Peak location exceeds range (Time = %s)',num2str(min_loc));
xlabel(lab,'FontSize',8);
ylabel('Mean Voltage (\muV)','FontSize',8);
title('Average Waveform','FontSize',8);
elseif (length(io(leftmin))>2)||(length(io(rightmin))>2)
lab = sprintf('Time (ms) \n Peak limit exceeded (# = %s) Peak = %s',num2str(length(io)),num2str(min(wvfrms_avg_scaled)));
xlabel(lab,'FontSize',8);
ylabel('Mean Voltage (\muV)','FontSize',8);
title('Average Waveform','FontSize',8);
else
lab = sprintf('Time (ms) \n Peak = %s Slope = %s',num2str(min(wvfrms_avg_scaled)),num2str(slope_scaled));
xlabel(lab,'FontSize',8)
ylabel('Mean Voltage (\muV)','FontSize',8);
title('Average Waveform','FontSize',8);
end
end
if (isempty(wvfrms_std)==0&isempty(wvfrms_avg)==0)
subplot(3,2,2);
errorbar(t,wvfrms_avg,wvfrms_std); %Plots errorbars
end
wvfrms_num_text = sprintf(['Time (ms) \n # Waveforms: ' num2str(size(Sort.(name)(block).block,2))]);
xlabel(wvfrms_num_text,'FontSize',8);
ylabel('Mean Voltage (V)','FontSize',8);
title('Average Waveform + STD','FontSize',8);
wvfrms = Sort.(name)(block).block;
for i = 1:size(wvfrms,1)
if isempty(wvfrms)==0
min_pts = min(wvfrms,[],2); %Adds array of min wvfrm points to matrix
slope = zeros(1,size(wvfrms,1));
new = wvfrms(i,:);
new_cut = new(40:end);
[~,locs] = findpeaks(new_cut);
if isempty(locs)==1
ind = max(new_cut);
else
ind = locs(1);
end
x1 = new(find(min_pts(i)));
y1 = min_pts(i);
x2 = ind;
y2 = new(find(ind));
slope(i) = (y2-y1)./(x2-x1);
else
slope(i) = 0;
end
end
bins = 100;
hist_val = (min_pts(:)*10^6);
if isempty(hist_val)==0
%Convert matrix of min points to array and into microvolts
subplot(3,2,3);
histogram(hist_val,bins);
ylabel('Count','FontSize',8);
title('Waveform Peaks','FontSize',8);
cnt = histcounts(hist_val,bins); %Returns bin counts
line_fit = zeros(1,length(cnt)); %Preallocates vector to hold line to fit histogram
for i = 3:length(line_fit)-3
if (cnt(i)<mean(cnt)) %If bin count is less than mean, take mean of 3
cnt(i)=mean([cnt(i-1) cnt(i+1)]); %consecutive bins, set as bin count
end
if (mean([cnt(i-2) cnt(i-1) cnt(i) cnt(i+1) cnt(i+2)])>=mean(cnt)) %If mean of 5 consecutive bins
line_fit(i-1) = (max([cnt(i-2) cnt(i-1) cnt(i) cnt(i+1) cnt(i+2)]));%exceeds bin count, set max,
end %add to line fit vector
end
line_fit(line_fit<=mean(cnt)) = min(cnt)+1; %Set line_fit values less than mean
x = linspace(min(hist_val),max(hist_val),length(line_fit)); %X axis (min - max point of vals)
hold on
plot(x,line_fit,'k','LineWidth',1.5);
assignin('base','hist_val',hist_val);
if (isempty(hist_val)==0)
gm = fitgmdist(hist_val,2,'RegularizationValue',0.1);
warning('off','stats:gmdistribution:FailedToConverge');
comp1 = gm.ComponentProportion(1)*100;
comp2 = gm.ComponentProportion(2)*100;
mean1 = gm.mu(1);
mean2 = gm.mu(2);
hist_leg = sprintf('\\muV \n Component 1 = %0.2f%% Component 2 = %0.2f%% \n Mean 1 = %0.2f Mean 2 = %0.2f',comp1,comp2,mean1,mean2);
xlabel(hist_leg,'FontSize',8);
end
hold off
else
subplot(3,2,3);
hist_val = 0;
plot(hist_val);
end
hist_val = (slope(:)*10^3);
if isempty(hist_val)==0
subplot(3,2,4);
histogram(hist_val,bins);
ylabel('Count');
cnt = histcounts(hist_val,bins); %Returns bin counts
line_fit = zeros(1,length(cnt)); %Preallocates vector to hold line to fit histogram
for i = 3:length(line_fit)-3
if (cnt(i)<mean(cnt)) %If bin count is less than mean, take mean of 3
cnt(i)=mean([cnt(i-1) cnt(i+1)]); %consecutive bins, set as bin count
end
if (mean([cnt(i-2) cnt(i-1) cnt(i) cnt(i+1) cnt(i+2)])>=mean(cnt)) %If mean of 5 consecutive bins
line_fit(i-1) = (max([cnt(i-2) cnt(i-1) cnt(i) cnt(i+1) cnt(i+2)])); %exceeds bin count, set max,
end %add to line fit vector
end
line_fit(line_fit<=mean(cnt)) = min(cnt)+1; %Set line_fit values less than mean
x = linspace(min(hist_val),max(hist_val),length(line_fit)); %X axis (min - max point of vals)
hold on
plot(x,line_fit,'k','LineWidth',1.5);
gm = fitgmdist(hist_val,2,'RegularizationValue',0.1);
warning('off','stats:gmdistribution:FailedToConverge');
comp1 = gm.ComponentProportion(1)*100;
comp2 = gm.ComponentProportion(2)*100;
mean1 = gm.mu(1);
mean2 = gm.mu(2);
title('Waveform Slope','FontSize',8);
hist_leg = sprintf('Slope (m) \n Component 1 = %0.2f%% Component 2 = %0.2f%% \n Mean 1 = %0.2f Mean 2 = %0.2f',comp1,comp2,mean1,mean2);
xlabel(hist_leg,'FontSize',8);
hold off
else
subplot(3,2,4);
hist_val = 0;
plot(hist_val);
end
rng(1);
wv_prop = [min_pts(:) slope(:)];
if (isempty(wv_prop)==0)
[idx,C] = kmeans(wv_prop,2);
subplot(3,2,5);
plot(wv_prop(idx==1,1),wv_prop(idx==1,2),'b.','MarkerSize',12);
hold on
plot(wv_prop(idx==2,1),wv_prop(idx==2,2),'r.','MarkerSize',12);
plot(C(:,1),C(:,2),'kx',...
'MarkerSize',15,'LineWidth',3)
title('Clustered Peak and Slope','FontSize',8);
fig_about = sprintf('BL%s - Cluster %s Block %s', animal,num2str(clust),num2str(block));
figtitle(fig_about);
else
subplot(3,2,5);
wv_prop = 0;
plot(wv_prop);
end
if isempty(wvfrms)==0
[vals] = align_wvs(wvfrms);
if (~isempty(vals))
subplot(3,2,6);
plot(t,vals);
title('Raw Waveforms','FontSize',8);
end
else
subplot(3,2,6);
w = 0;
plot(w);
end
print(fig{clust},['Cluster-' num2str(clust) ' Block-' num2str(block)],'-dpng');
end
end
disp('Done');
end