%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % % A solution to CSE For Your Homework Project 3 % % Classified Information: The Data Clustering Problem % % Nargess Memarsadeghi and Dianne P. O'Leary % % % % problem5_and_6.m Dianne P. O'Leary 04/03 % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Generate the data and set convergence parameters. % % The data is ast, an array of dimension m x p x q. % % % % We will try to cluster the q-vectors into k clusters, % % for k=2,3,4. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% tol = 1; maxiters = 40; m = 20; p = 1; q = 2; ymin = -1 ; ymax = 1 ; ast(1:m/2,1,1) = 1; ast(1:m/2,1,2) = linspace(ymin,ymax,m/2); ast(m/2+1:m,1,1) = -1; ast(m/2+1:m,1,2) =linspace(ymin,ymax,m/2); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Generate markers for the plots. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% shape(1).str='mo'; shape(2).str='bv'; shape(3).str='gs'; shape(4).str='kd'; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Cluster the original data (Problem 5) % % and then the scaled data, (Problem 6), where the % % scaling replaces the second component of each data % % value by 100 * the value. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% for scale = [1 100], ast(:,1,2) = ast(:,1,2)*scale; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Try two different initial values for centers. % % Try 2, 3, and 4 clusters. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% for mystart=1:2, %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Graph the data. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% figure subplot(2,2,1) plot(ast(:,1,1),ast(:,1,2),'r*') title('Original Data') axis([-1.2 1.2 1.2*scale*ymin 1.2*scale*ymax]) for k=2:4, ss = sprintf('%d clusters',k); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Find the range of values in ast and initialize the % % cluster centers to be equally-spaced in % % this range. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% centers = []; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % First choice: initialize the centers to be % % the extreme data points. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% if (mystart == 1) for kk = 1:k, centers(1,kk) = (-1)^kk; centers(2,kk) = scale * ymin*(-1)^floor(kk/2); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Second choice: initialize the centers to be % % equally spaced in the 2nd variable, along the % % centerline for the 1st variable. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% else centers(1,1:k) = 0; centers(2,1:k) = scale*linspace(ymin,ymax,k); end %if %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Find the k clusters using the k-means algorithm. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% [aclus,centers,clustercounts,iters] = ... mycluster(ast,centers,tol,maxiters); clusterradius = computeradius(ast,centers,aclus); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Print summary information. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% disp(ss) disp(sprintf('Number of iterations = %d',iters)) disp('') disp(' Cluster centers Counts Radii') disp(sprintf('%7.2f %7.2f %10d %7.2f \n', ... [centers',clustercounts',clusterradius']')) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Display the clustered data and measure its error, % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% [aclus,clustercounts] = map_to_cluster(ast,centers); asclus = zeros(m,p,q); for i=1:m, for j=1:p, asclus(i,j,:) = centers(:,aclus(i,j)); end end disp('') n1 = 0; n2 = 0; for i=1:q, n1 = n1 + norm(ast(:,:,i)-asclus(:,:,i),'fro')^2; n2 = n2 + norm(ast(:,:,i) ,'fro')^2; end disp(sprintf('Norm of relative change in data = %e',sqrt(n1/n2))) subplot(2,2,k) hold on for kk=1:k, ind = find(aclus==kk); plot(ast(ind,1,1),ast(ind,1,2),shape(kk).str) end axis([-1.2 1.2 1.2*scale*ymin 1.2*scale*ymax]) title(ss) drawnow end % for k end % for mystart end % for scale