Kmeans被绘制的质心过多

时间:2018-07-25 03:43:12

标签: matlab k-means

我试图通过MATLAB函数kmean绘制聚类,但是得到的质心过多,不知道为什么。这是我的代码和一个图形示例:

rng(1);

wv_prop = [min_pts(:) slope(:)];

if (isempty(wv_prop)==0)

[idx,C] = kmeans(wv_prop,2);      

subplot(3,2,5);

plot(wv_prop(idx==1,1),wv_prop(idx==1,2),'b.','MarkerSize',12);

hold on

plot(wv_prop(idx==2,1),wv_prop(idx==2,2),'r.','MarkerSize',12);

plot(C(:,1),C(:,2),'kx',...
 'MarkerSize',15,'LineWidth',3)

FIGURE

以下是我使用的数据示例: wv_prop:

-7.50904246127179e-05   2.52737793199461e-05
-7.64715493632322e-05   -29.2845021783221
-8.16630514296111e-05   -15.5896244315076
-8.60516901697005e-05   3.87325886247646e-05
-9.07390060961131e-05   4.06844795948271e-05
-7.93980060844007e-05   3.72806601486833e-05
-8.08420950480078e-05   3.81372062193057e-05
-8.53045358845788e-05   4.00072285969318e-05
-7.07712622172574e-05   3.55502071296987e-05
-8.02846575361635e-05   3.91085777803079e-05
-8.82904795076420e-05   4.21557386394776e-05
-8.32088783242009e-05   4.08103587885502e-05
-8.17564769131708e-05   4.06201592898485e-05
-8.88574631122910e-05   4.31980154605407e-05
-9.55496137235401e-05   4.55119867638717e-05
-7.11241881995855e-05   3.72772062250438e-05
-8.20641318582800e-05   6.09118479264444e-05
-7.92369664739745e-05   5.86246041439769e-05
-7.61219361068837e-05   5.57318660221894e-05
-8.52918510230295e-05   5.84710267850959e-05
-8.99668387994064e-05   5.84558301867090e-05
-9.62926333243702e-05   5.87762601336998e-05
-7.87678776488358e-05   4.67111894400931e-05
-7.53525297201741e-05   4.13207831828739e-05
-7.71766983561651e-05   3.82625914011195e-05
-9.03499693359608e-05   4.06874790212135e-05
-7.59387077492098e-05   2.92390401569819e-05
-7.97649576465785e-05   32.1683359898974
-8.06408560217508e-05   1.55409105433306e-05
-8.10515208048491e-05   1.31180389653758e-05
-7.70540121076476e-05   9.43353748786386e-06
-7.24001267378072e-05   5.78599898248438e-06
-8.93350436455590e-05   9.61034087028361e-06
-7.97722332494743e-05   4.89104076311932e-06
-8.40022599007737e-05   5.06726288587479e-06
-7.89655937936233e-05   2.44686642783556e-06
-8.58007004774045e-05   4.06628163987085e-06
-7.68775819259902e-05   1.06146142996962e-06
-7.05769224846652e-05   -2.25666633700963e-06
-7.73022200637920e-05   1.34546072255262e-06
-7.65784897728499e-05   1.62917829786978e-06
-7.41548367397790e-05   1.46536230997079e-06
-9.17371298592096e-05   1.17025036839378e-05
-7.35354500231489e-05   4.43710161064086e-06
function [] = Select_Figs(filename,startblock,endblock,startclust,endclust,animal,day)
%Select_Figs - Plots average waveforms, standard deviation, difference over time,
%fitted peak location histogram, mean squared error, k-mean clustered peak location and slope,
%and raw waveforms across selected blocks and clusters,
%saves to folder Selected-Figures-animal-date
%
%Select_Figs(filename,startblock,endblock,startclust,endclust,animal,date)
%
%filename - Sort.mat(e.g. = 'Sort.mat')
%
%startblock- first block (e.g. = 7)
%
%endblock - last block (e.g. = 12)
%
%startclust - first cluster (e.g. = 5)
%
%endclust - last cluster (e.g. = 10)
%
%animal - animal number (e.g. = 12)
%
%date - start of experiment (e.g. = 101617)
%
%Function called by User_Sort.m

Sort = filename; 
addpath(pwd);
%Get Sort file
foldername = sprintf('Selected-Figures-%s-%s',animal,day);                 %Creates dynamic folder name to store figures
mkdir(foldername);                                                          %Makes directory
cd(fullfile(foldername));                                                   %Cd to new directory
tvec = 0:.013653333:(.013653333*97);                                        %Time vector
t = tvec(2:end);

for clust = startclust:endclust                                                 %Loops through all clusters
   fig = cell(1,endblock);                                             %Preallocate # of figures
   name = sprintf('Idx_%d',clust);                                         %Individual cluster name
   fig{clust} = figure('Visible', 'off');                                  %Turns figure visibility off
   for block = startblock:endblock                                          %Loop through all blocks

       wvfrms_avg =Sort.(name)(block).avg;
        wvfrms_avg_scaled = (wvfrms_avg*10^6);
        wvfrms_std =Sort.(name)(block).standdev;
        min_ind = wvfrms_avg_scaled == min(wvfrms_avg_scaled);
        min_loc = t(min_ind);
        [~,io] = findpeaks(wvfrms_avg_scaled);
        leftmin = io<find(wvfrms_avg_scaled==min(wvfrms_avg_scaled));
        leftmin = leftmin(leftmin~=0);
        rightmin = io>find(wvfrms_avg_scaled==min(wvfrms_avg_scaled));
        rightmin = rightmin(rightmin~=0);
        if (isempty(wvfrms_avg_scaled)==0)
            subplot(3,2,1);
        if (isnan(wvfrms_avg_scaled)==0)&((-30<min(wvfrms_avg_scaled))||(min_loc>0.55)||(min_loc<0.3)||(length(io(leftmin))>2)||(length(io(rightmin))>2))
            plot(tvec(1:end-1),wvfrms_avg_scaled,'r');
        else
            plot(tvec(1:end-1),wvfrms_avg_scaled,'b');
        end           
        end

    new_wv = wvfrms_avg_scaled(40:end);
    [~,locs_scaled] = findpeaks(new_wv);

    if isempty(locs_scaled)==1
        ind_scaled = max(new_wv);
    else
    ind_scaled = locs_scaled(1);
    end

    x1_scaled = new_wv(find(min(wvfrms_avg_scaled)));
    y1_scaled = min(wvfrms_avg_scaled);
    x2_scaled = ind_scaled;
    y2_scaled = new_wv(find(ind_scaled));
    slope_scaled= (y2_scaled-y1_scaled)./(x2_scaled-x1_scaled);

       if (isnan(wvfrms_avg_scaled)==0)
       if ((-30<min(wvfrms_avg_scaled)))
            lab = sprintf('Time (ms) \n Peak exceeds amplitude range (%s)',num2str(min(wvfrms_avg_scaled)));
            xlabel(lab,'FontSize',8);
            ylabel('Mean Voltage (\muV)','FontSize',8);
            title('Average Waveform','FontSize',8);
            elseif ((min_loc>0.55)||(min_loc<0.3))
              lab = sprintf('Time (ms) \n Peak location exceeds range (Time = %s)',num2str(min_loc));
              xlabel(lab,'FontSize',8);
             ylabel('Mean Voltage (\muV)','FontSize',8);
            title('Average Waveform','FontSize',8);
        elseif (length(io(leftmin))>2)||(length(io(rightmin))>2)
            lab = sprintf('Time (ms) \n Peak limit exceeded (# = %s) Peak = %s',num2str(length(io)),num2str(min(wvfrms_avg_scaled)));
              xlabel(lab,'FontSize',8);
               ylabel('Mean Voltage (\muV)','FontSize',8);
            title('Average Waveform','FontSize',8);
       else 
            lab = sprintf('Time (ms) \n Peak = %s Slope = %s',num2str(min(wvfrms_avg_scaled)),num2str(slope_scaled));
            xlabel(lab,'FontSize',8)
         ylabel('Mean Voltage (\muV)','FontSize',8);
            title('Average Waveform','FontSize',8);
       end
       end


        if (isempty(wvfrms_std)==0&isempty(wvfrms_avg)==0)
            subplot(3,2,2);
            errorbar(t,wvfrms_avg,wvfrms_std);       %Plots errorbars
        end

         wvfrms_num_text = sprintf(['Time (ms) \n # Waveforms: ' num2str(size(Sort.(name)(block).block,2))]);

        xlabel(wvfrms_num_text,'FontSize',8);
        ylabel('Mean Voltage (V)','FontSize',8);
        title('Average Waveform + STD','FontSize',8);


    wvfrms = Sort.(name)(block).block;  
    for i = 1:size(wvfrms,1)

    if isempty(wvfrms)==0
    min_pts = min(wvfrms,[],2);                                        %Adds array of min wvfrm points to matrix
    slope = zeros(1,size(wvfrms,1));
    new = wvfrms(i,:);
    new_cut = new(40:end);
    [~,locs] = findpeaks(new_cut);
    if isempty(locs)==1
        ind = max(new_cut);
    else
    ind = locs(1);
    end
    x1 = new(find(min_pts(i)));
    y1 = min_pts(i);
    x2 = ind;
    y2 = new(find(ind));
    slope(i) = (y2-y1)./(x2-x1);
    else
        slope(i) = 0;
    end

    end


    bins = 100;       
     hist_val = (min_pts(:)*10^6);   
    if isempty(hist_val)==0
                                           %Convert matrix of min points to array and into microvolts
    subplot(3,2,3);
    histogram(hist_val,bins);       
    ylabel('Count','FontSize',8);
    title('Waveform Peaks','FontSize',8);
    cnt = histcounts(hist_val,bins);                                        %Returns bin counts
    line_fit = zeros(1,length(cnt));                                        %Preallocates vector to hold line to fit histogram

    for i = 3:length(line_fit)-3     
        if (cnt(i)<mean(cnt))                                               %If bin count is less than mean, take mean of 3 
        cnt(i)=mean([cnt(i-1) cnt(i+1)]);                                   %consecutive bins, set as bin count
        end

        if (mean([cnt(i-2) cnt(i-1) cnt(i) cnt(i+1) cnt(i+2)])>=mean(cnt))  %If mean of 5 consecutive bins
        line_fit(i-1) = (max([cnt(i-2) cnt(i-1) cnt(i) cnt(i+1) cnt(i+2)]));%exceeds bin count, set max, 
        end                                                                 %add to line fit vector
    end

    line_fit(line_fit<=mean(cnt)) = min(cnt)+1;                             %Set line_fit values less than mean

    x = linspace(min(hist_val),max(hist_val),length(line_fit));             %X axis (min - max point of vals)
    hold on
    plot(x,line_fit,'k','LineWidth',1.5);
    assignin('base','hist_val',hist_val);
    if (isempty(hist_val)==0)
    gm = fitgmdist(hist_val,2,'RegularizationValue',0.1);
    warning('off','stats:gmdistribution:FailedToConverge');
    comp1 = gm.ComponentProportion(1)*100;
    comp2 = gm.ComponentProportion(2)*100;
    mean1 = gm.mu(1);
    mean2 = gm.mu(2);
    hist_leg = sprintf('\\muV \n Component 1 = %0.2f%% Component 2 = %0.2f%% \n Mean 1 = %0.2f Mean 2 = %0.2f',comp1,comp2,mean1,mean2);
    xlabel(hist_leg,'FontSize',8);
    end
    hold off
    else 
     subplot(3,2,3);
     hist_val = 0;
     plot(hist_val);
    end

        hist_val = (slope(:)*10^3);

    if isempty(hist_val)==0
    subplot(3,2,4);
    histogram(hist_val,bins);       
    ylabel('Count');
    cnt = histcounts(hist_val,bins);   %Returns bin counts
    line_fit = zeros(1,length(cnt));   %Preallocates vector to hold line to fit histogram

    for i = 3:length(line_fit)-3     

        if (cnt(i)<mean(cnt))             %If bin count is less than mean, take mean of 3 
        cnt(i)=mean([cnt(i-1) cnt(i+1)]);   %consecutive bins, set as bin count
        end

        if (mean([cnt(i-2) cnt(i-1) cnt(i) cnt(i+1) cnt(i+2)])>=mean(cnt))          %If mean of 5 consecutive bins
        line_fit(i-1) = (max([cnt(i-2) cnt(i-1) cnt(i) cnt(i+1) cnt(i+2)]));      %exceeds bin count, set max, 
        end                                                             %add to line fit vector

    end

    line_fit(line_fit<=mean(cnt)) = min(cnt)+1;                     %Set line_fit values less than mean
    x = linspace(min(hist_val),max(hist_val),length(line_fit)); %X axis (min - max point of vals)
    hold on
    plot(x,line_fit,'k','LineWidth',1.5);

    gm = fitgmdist(hist_val,2,'RegularizationValue',0.1);
    warning('off','stats:gmdistribution:FailedToConverge');
    comp1 = gm.ComponentProportion(1)*100;
    comp2 = gm.ComponentProportion(2)*100;
    mean1 = gm.mu(1);
    mean2 = gm.mu(2);

    title('Waveform Slope','FontSize',8);
    hist_leg = sprintf('Slope (m) \n Component 1 = %0.2f%% Component 2 = %0.2f%% \n Mean 1 = %0.2f Mean 2 = %0.2f',comp1,comp2,mean1,mean2);
    xlabel(hist_leg,'FontSize',8);

    hold off
    else 
     subplot(3,2,4);
     hist_val = 0;
     plot(hist_val);
    end

    rng(1);
        wv_prop = [min_pts(:) slope(:)];

        if (isempty(wv_prop)==0)
    [idx,C] = kmeans(wv_prop,2);      

        subplot(3,2,5);
    plot(wv_prop(idx==1,1),wv_prop(idx==1,2),'b.','MarkerSize',12);
    hold on
    plot(wv_prop(idx==2,1),wv_prop(idx==2,2),'r.','MarkerSize',12);
   plot(C(:,1),C(:,2),'kx',...
     'MarkerSize',15,'LineWidth',3)

    title('Clustered Peak and Slope','FontSize',8);
        fig_about = sprintf('BL%s - Cluster %s Block %s', animal,num2str(clust),num2str(block));
        figtitle(fig_about);
        else
                subplot(3,2,5);
        wv_prop = 0;
        plot(wv_prop);
        end
if isempty(wvfrms)==0
[vals] = align_wvs(wvfrms);     
    if (~isempty(vals)) 
        subplot(3,2,6);
        plot(t,vals);
        title('Raw Waveforms','FontSize',8);   
    end
else
    subplot(3,2,6);
    w = 0;
    plot(w);
end

    print(fig{clust},['Cluster-' num2str(clust) ' Block-' num2str(block)],'-dpng');
    end  
end
  disp('Done');
end

0 个答案:

没有答案