
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                        %
%  A solution to CSE For Your Homework Project 3         %
%  Classified Information: The Data Clustering Problem   %
%  Nargess Memarsadeghi and Dianne P. O'Leary            %
%                                                        %
%  problem5_and_6.m  Dianne P. O'Leary 04/03             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Generate the data and set convergence parameters.     %
%  The data is ast, an array of dimension m x p x q.     %
%                                                        %
%  We will try to cluster the q-vectors into k clusters, %
%  for k=2,3,4.                                          %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

tol = 1;
maxiters = 40;
m = 20;
p = 1;
q = 2;
ymin = -1 ;
ymax =  1 ;
ast(1:m/2,1,1) = 1;
ast(1:m/2,1,2) = linspace(ymin,ymax,m/2);
ast(m/2+1:m,1,1) = -1;
ast(m/2+1:m,1,2) =linspace(ymin,ymax,m/2);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Generate markers for the plots.                       %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

shape(1).str='mo';
shape(2).str='bv';
shape(3).str='gs';
shape(4).str='kd';

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Cluster the original data (Problem 5)                 %
%  and then the scaled data, (Problem 6), where the      %
%  scaling replaces the second component of each data    %
%  value by 100 * the value.                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

for scale = [1 100],

   ast(:,1,2) = ast(:,1,2)*scale;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Try two different initial values for centers.         %
%  Try 2, 3, and 4 clusters.                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

for mystart=1:2,

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Graph the data.                                       %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

   figure
   subplot(2,2,1)
   plot(ast(:,1,1),ast(:,1,2),'r*')
   title('Original Data')
   axis([-1.2 1.2 1.2*scale*ymin 1.2*scale*ymax])

for k=2:4,

     ss = sprintf('%d clusters',k);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Find the range of values in ast and initialize the    %
%  cluster centers to be equally-spaced in               %
%  this range.                                           %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

      centers = [];

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% First choice: initialize the centers to be             %
% the extreme data points.                               %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

   if (mystart == 1)
      for kk = 1:k,
        centers(1,kk) = (-1)^kk;
        centers(2,kk) = scale * ymin*(-1)^floor(kk/2);
      end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Second choice: initialize the centers to be            %
% equally spaced in the 2nd variable, along the          %
% centerline for the 1st variable.                       %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    else
      centers(1,1:k) = 0;
      centers(2,1:k) = scale*linspace(ymin,ymax,k);
    end %if

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Find the  k clusters using the k-means algorithm.     %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

      [aclus,centers,clustercounts,iters] = ...
              mycluster(ast,centers,tol,maxiters);
      clusterradius = computeradius(ast,centers,aclus);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Print summary information.                            %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

      disp(ss)
      disp(sprintf('Number of iterations = %d',iters))
      disp('')
      disp('  Cluster centers       Counts  Radii')
      disp(sprintf('%7.2f  %7.2f %10d %7.2f \n', ...
            [centers',clustercounts',clusterradius']'))

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%  Display the clustered data and measure its error,     %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

      [aclus,clustercounts] = map_to_cluster(ast,centers);

      asclus = zeros(m,p,q);

      for i=1:m,
      for j=1:p,
        asclus(i,j,:) = centers(:,aclus(i,j));
      end
      end

      disp('')

      n1 = 0;
      n2 = 0;
      for i=1:q,
        n1 = n1 + norm(ast(:,:,i)-asclus(:,:,i),'fro')^2;
        n2 = n2 + norm(ast(:,:,i)              ,'fro')^2;
      end
      disp(sprintf('Norm of relative change in data = %e',sqrt(n1/n2)))
   
      subplot(2,2,k)
      hold on
      for kk=1:k,
         ind = find(aclus==kk);
         plot(ast(ind,1,1),ast(ind,1,2),shape(kk).str)
      end
      axis([-1.2 1.2 1.2*scale*ymin 1.2*scale*ymax])

      title(ss)
      drawnow

   end % for k
   end % for mystart
end % for scale
