DOC/TST: minor updates

brian-lau · Aug 9, 2017 · b740b5e · b740b5e
1 parent 8a30a9c
commit b740b5e
Show file tree

Hide file tree

Showing 6 changed files with 81 additions and 27 deletions.
diff --git a/Examples/exampleMNIST.m b/Examples/exampleMNIST.m
@@ -11,8 +11,7 @@
 
 % Sketch object
 k = 64;
-alpha = 0.2;
-sketcher = FrequentDirections(k,'alpha',alpha);
+sketcher = FrequentDirections(k);
 
 % Process streamed data samples
 tic;

diff --git a/Examples/exampleRandomStream.m b/Examples/exampleRandomStream.m
@@ -11,6 +11,13 @@
    count = count + 1;
 end
 
+% Retrieve sketch
+B = sketcher.get();
+
+% Do something with sketch, e.g., approximate covariance matrix
+covA = B'*B;
+
+% To sketch a different matrix, release resources
 sketcher.release();
 
 %% Stream blocks of samples

diff --git a/Examples/exampleSparseFD.m b/Examples/exampleSparseFD.m
@@ -1,36 +1,44 @@
-% Reproduce part of Figure 4 of Teng & Chu using Caltech Birds dataset
+% Reproduce part of Figure 4 & 5 of Teng & Chu 
 %
 %     Teng & Chu (2017). Low-Rank approximation via sparse frequent directions.
 %       arXiv preprint arXiv:1705.07140.
-
+%
+% TODO
+%   o average over reps since sparse method is not deterministic
+%   o runtimes don't seem to match Teng & Chu (their vanilla FD is slow?)
+%
 clear
-% Check that Birds data exists somewhere
-if ~exist('image_attribute_labels.txt','file')
-   help('BirdsReader');
-   error('Birds data must be downloaded first');
-end
-
-% Reader for Birds data
-BR = BirdsReader('filename','image_attribute_labels.txt');
 
-% Load entire data set into memory
-BR.blockSize = inf;
-A = BR();
+if 0 % Birds data
+   DR = BirdsReader();
+   p = 50; % Approximating rank (k in table 4.1)
+   k = 50:10:150;
+   % Load entire data set into memory
+   DR.blockSize = inf;
+   A = DR();
+else % MNIST data
+   DR = DigitsReader();
+   p = 100; % Approximating rank (k in table 4.1)
+   k = 100:10:200;
+   % Load entire data set into memory
+   DR.blockSize = inf;
+   A = DR();
+
+   A = reshape(A,28*28,60000)';
+end
 
 n = size(A,1);
 
-k = 50:10:150;
 sp = [true true true false];
 nbetak = [5 10 50 1];
 id = {'SpFD5' 'SpFD10' 'SpFD50' 'FastFD'};
 symbol = ['^' 'd' '+' 's'];
 color = ['g' 'g' 'g' 'k'];
 
 tic;
-[U,S,V] = svd(A);
+[U,S,V] = svd(A,'econ');
 bruteRuntime = toc;
-m = 50;
-Am = U(:,1:m)*S(1:m,1:m)*V(:,1:m)'; % For projection error
+Am = U(:,1:p)*S(1:m,1:p)*V(:,1:p)'; % For projection error
 
 %% This can take a little time
 count = 1;
@@ -47,7 +55,7 @@
 
       coverr(count,m) = sketcher.coverr(A);
 
-      Am_ = sketcher.approx(A,50);
+      Am_ = sketcher.approx(A,p);
       projerr(count,m) = norm(A-Am_,'fro')/norm(A-Am,'fro');
 
       nSVD(count,m) = sketcher.nSVD;

diff --git a/FrequentDirections.m b/FrequentDirections.m
@@ -22,7 +22,15 @@
 %        Parameterized FD: alpha = scalar in (0,1), fast = false
 %        Fast Parameterized FD: alpha = scalar in (0,1), fast = true
 %           alpha = 0.2, fast = true produces 'Fast 0.2FD' in Desai et al.
-%        
+%
+%     Also implements one non-deterministic method of Teng & Chu (2017)
+%     that uses a sparse subspace embedding as an intermediate step to
+%     increase efficiency and take advantage of any sparsity in the input
+%     matrix:
+%        SpEmb: sparse = true, alpha = 1, fast = true 
+%               beta >= 1 controls the blocksize for sparse embedding,
+%               which is equal to beta*k
+%
 %     INPUTS
 %     k - scalar in [1,d], sketch size. Note that this is commonly referred 
 %         to as l (ell) in references and other implementations
@@ -31,6 +39,11 @@
 %     fast       - boolean, true indicates fast algorithm (default = TRUE)
 %     alpha      - scalar in [0,1], controls fraction of sketch rows zeroed
 %                  on each rank reduction (default = 1)
+%     sparse     - boolean, true indicates sparse algorithm (default = FALSE)
+%     beta       - scalar >= 1, determines the size of sparse embedding.
+%                  beta*k is the number of rows of A that are reduced on
+%                  each iteration (detault = 10)
+%                  Note that Teng & Chu (2017) use alpha for this parameter
 %     monitor    - boolean, true plots singular values at each rank reduction
 %                  (default = FALSE)
 %     figureAxis - axis handle for use when monitor = TRUE
@@ -49,6 +62,7 @@
 %               Setting the input true (i.e. obj.get(true) as opposed to
 %               obj.get() or get(obj)) will return a [2k x d] matrix when
 %               fast = true.
+%     approx  - return a low-rank approximation
 %     coverr  - given [n x d] matrix A, returns covariance error of sketch
 %               ||A'A - B'B||_2 / ||A||_F^2
 %     projerr - given [n x d] matrix A, returns projection error of sketch
@@ -120,7 +134,7 @@
       alpha = 1         % [0,1] skrinkage control parameter, 0 = iSVD, 1 = original FD
       fast = true       % true indicates fast algorithm
       sparse = false    % true indicates FD with sparse embedding
-      beta = 1          % scalar >= 1 && <= n/k
+      beta = 10         % scalar >= 1 && <= n/k
    end
 
    properties
@@ -253,6 +267,15 @@
       end
 
       % APPROX      Low-rank approximation
+      %
+      % INPUT
+      % A  - [n x d] matrix to approximate
+      %
+      % OPTIONAL
+      % k  - rank, defaults to sketch size k
+      %
+      % OUTPUT
+      % Ak - [n x d] low-rank approximation using sketch
       function Ak = approx(self,A,k)
          [~,V] = get(self);
          if nargin < 3
@@ -352,7 +375,7 @@
             obj.step(zeros(1,obj.d));
          end
 
-         % Update count & n
+         % Update counters
          obj.n = sum(cellfun(@(x) x.n,varargin));
          obj.nSVD = obj.nSVD + sum(cellfun(@(x) x.nSVD,varargin));
       end
@@ -375,7 +398,6 @@ function setupImpl(self,A)
          end
 
          self.B_ = zeros(self.k2_,d);
-         % TODO : preload first block of data? 1:min(size(A,1),k)
 
          if self.sparse
             self.betak_ = fix(self.beta*self.k);
@@ -411,7 +433,7 @@ function stepImpl(self,A)
          indB = find(~any(B,2)); % Index all-zero rows of B
          i = 1;                  % Keep track of data samples appended
          while i <= n
-            % Append data
+            %% Append data
             if ~isempty(indB)
                if sparse
                   if indSA < betak          % Space available in buffer
@@ -438,7 +460,7 @@ function stepImpl(self,A)
                end
             end
 
-            % Update sketch
+            %% Update sketch
             if isempty(indB)
                [~,S,V] = svd(B,'econ');
                Sprime = reduceRank(S,k,alpha);
@@ -472,6 +494,8 @@ function stepImpl(self,A)
       function releaseImpl(self)
          self.B_ = [];
          self.d_ = [];
+         self.betak_ = [];
+         self.SA_ = [];
          if self.monitor
             close(self.figureAxis.Parent);
          end

diff --git a/Testing/TestOutputs.m b/Testing/TestOutputs.m
@@ -28,7 +28,7 @@ function bad_merge(testCase)
             'FrequentDirections:BadInput');
 
          fd2.k = 16;
-         fd1(rand(32,16));
+         fd1(rand(32,24));
          fd2(rand(32,32));
 
          testCase.assertError(@() merge(fd1,fd2),...

diff --git a/Testing/TestParameters.m b/Testing/TestParameters.m
@@ -75,6 +75,22 @@ function bad_fast(testCase)
          testCase.assertError(@() FrequentDirections(16,'fast',[1 2]),...
             'FrequentDirections:BadInput');
       end
+
+      function good_sparse(testCase)
+         k = 16;
+         fd = FrequentDirections(k,'sparse',true);
+
+         testCase.assertEqual(fd.sparse,true);
+
+         fd = FrequentDirections(k,'sparse',false);
+
+         testCase.assertEqual(fd.sparse,false);
+      end
+
+      function bad_sparse(testCase)
+         testCase.assertError(@() FrequentDirections(16,'fast',0,'sparse',1),...
+            'FrequentDirections:BadInput');
+      end
 
       function good_monitor(testCase)
          k = 16;