function section2_soft(k_in) %% EM (soft k-means) example % CS224N NLP Section 2 EM 2004.8.18 % Author: Paul Baumstarck global P C k ix lw c1 = [1;2]+1.5; c2 = [5;4]+1.5; c3 = [6.5;1]+1.5; if nargin < 1 k = 2; else k = k_in; end pre = 3; n1 = pre*15; n2 = pre*20; n3 = 0*pre*20; n = n1+n2+n3; ms = 20; ms1 = 16; lw = 5; ix = zeros(k,n); s = 1; P = [bsxfun(@plus,s*randn(2,n1),c1) ... bsxfun(@plus,s*randn(2,n2),c2) ... bsxfun(@plus,s*randn(2,n3),c3)]; figure(1), clf, hold on, grid on, axis equal; plot(P(1,:),P(2,:),'.','MarkerSize',ms,'Color',0.5*ones(1,3)); title('Point cloud we wish to segment into 2 clusters'); z = axis; z([1 3]) = 0; axis(z); pause; C = reshape(P(:,ceil(n*rand(1,k))),[2 1 k]); if k == 2 kcolors = [1 0 0; 0 0 1]; % kcolors = [1 0 0; 0 1 0]; elseif k == 3 kcolors = eye(3); else kcolors = lines(k); end for i=1:10 figure(1), clf, hold on, grid on, axis equal; plot(P(1,:),P(2,:),'.','MarkerSize',ms,'Color',0.5*ones(1,3)); for j=1:k plot(C(1,1,j),C(2,1,j),'^','MarkerSize',ms1,... 'MarkerFaceColor',kcolors(j,:),'Color',kcolors(j,:)); end title(sprintf('Cluster centroids, itertion %d',i-1)); axis(z); pause; E(); % E step: compute expected labels ix figure(1), clf, hold on, grid on, axis equal; plot(P(1,:),P(2,:),'.','MarkerSize',ms,'Color',0.5*ones(1,3)); for j=1:k plot(C(1,1,j),C(2,1,j),'^','MarkerSize',ms1,... 'MarkerFaceColor',kcolors(j,:),'Color',kcolors(j,:)); end plotline(); axis(z); title('Minimum distance to centroid used to decide labels'); pause; figure(1), clf, hold on, grid on, axis equal; colours = sum(bsxfun(@times,reshape(ix,[k 1 n]),kcolors),1); colours = bsxfun(@rdivide,colours,1.3*max(colours,[],2)); for j=1:k plot(C(1,1,j),C(2,1,j),'^','MarkerSize',ms1,... 'MarkerFaceColor',kcolors(j,:),'Color',kcolors(j,:)); end for j=1:n plot(P(1,j),P(2,j),'.','MarkerSize',ms,'Color',colours(1,:,j)); end plotline(); axis(z); title('Labeled points'); pause; Cp = C; M(); figure(1), clf, hold on, grid on, axis equal; for j=1:k plot(Cp(1,1,j),Cp(2,1,j),'^','MarkerSize',ms1,... 'MarkerFaceColor',kcolors(j,:),'Color',kcolors(j,:)); end for j=1:n plot(P(1,j),P(2,j),'.','MarkerSize',ms,'Color',colours(1,:,j)); end ax = zeros(1,n); for j=1:k asg = max(ix,[],1)==ix(j,:); ax(find(asg)) = j; % plot(P(1,asg),P(2,asg),'o','LineWidth',2,'MarkerSize',ms,... % 'Color',colours(1,:,asg)); end % plotline(); axis(z); % plot(C(1,:),C(2,:),'rp'); % new centroids for j=1:n for m=1:k % plot([P(1,j) C(1,1,ax(j))],... % [P(2,j) C(2,1,ax(j))],':','Color',kcolors(ax(j),:)); lcolour = (1-ix(m,j)^0.5)*ones(1,3) + kcolors(m,:)*ix(m,j)^0.5; plot([P(1,j) C(1,1,m)],... [P(2,j) C(2,1,m)],':','Color',lcolour); end end title('New cluster centroids'); pause; end function E() % Compute expected labels from current centroids global P C k ix for i=1:k Cp = C(:,:,(1:k)~=i); c = 0.5*bsxfun(@plus, C(:,:,i), Cp); v = bsxfun(@minus, C(:,:,i), Cp); v = bsxfun(@rdivide,v,sqrt(sum(v.^2,1))); ix(i,:) = 1./(1+exp(-0.5*min(sum(bsxfun(@times,bsxfun(@minus,P,c),v),1),[],3))); end ix = bsxfun(@rdivide,ix,sum(ix,1)); function M() % Compute best centroids from current labels global P C k ix z = permute(ix,[3 2 1]); C = bsxfun(@rdivide,sum(bsxfun(@times,P,z),2),sum(z,2)); % for i=1:k % C(:,:,i) = mean(P(:,ix==i),2); % end function plotline() global C k lw if k == 2 t = [-15 15]; for i=1:k for j=i+1:k c = mean(C(:,:,[i j]),3); n = C(:,:,i)-C(:,:,j); n = n/norm(n); v = [-n(2);n(1)]; plot(c(1)+t*v(1),c(2)+t*v(2),'-','LineWidth',lw,'Color',0.5*ones(1,3)); end end else voronoi(C(1,:,:),C(2,:,:)); end