Skip to content

Commit

Permalink
DOC/TST: minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
brian-lau committed Aug 9, 2017
1 parent 8a30a9c commit b740b5e
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 27 deletions.
3 changes: 1 addition & 2 deletions Examples/exampleMNIST.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@

% Sketch object
k = 64;
alpha = 0.2;
sketcher = FrequentDirections(k,'alpha',alpha);
sketcher = FrequentDirections(k);

% Process streamed data samples
tic;
Expand Down
7 changes: 7 additions & 0 deletions Examples/exampleRandomStream.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
count = count + 1;
end

% Retrieve sketch
B = sketcher.get();

% Do something with sketch, e.g., approximate covariance matrix
covA = B'*B;

% To sketch a different matrix, release resources
sketcher.release();

%% Stream blocks of samples
Expand Down
44 changes: 26 additions & 18 deletions Examples/exampleSparseFD.m
Original file line number Diff line number Diff line change
@@ -1,36 +1,44 @@
% Reproduce part of Figure 4 of Teng & Chu using Caltech Birds dataset
% Reproduce part of Figure 4 & 5 of Teng & Chu
%
% Teng & Chu (2017). Low-Rank approximation via sparse frequent directions.
% arXiv preprint arXiv:1705.07140.

%
% TODO
% o average over reps since sparse method is not deterministic
% o runtimes don't seem to match Teng & Chu (their vanilla FD is slow?)
%
clear
% Check that Birds data exists somewhere
if ~exist('image_attribute_labels.txt','file')
help('BirdsReader');
error('Birds data must be downloaded first');
end

% Reader for Birds data
BR = BirdsReader('filename','image_attribute_labels.txt');

% Load entire data set into memory
BR.blockSize = inf;
A = BR();
if 0 % Birds data
DR = BirdsReader();
p = 50; % Approximating rank (k in table 4.1)
k = 50:10:150;
% Load entire data set into memory
DR.blockSize = inf;
A = DR();
else % MNIST data
DR = DigitsReader();
p = 100; % Approximating rank (k in table 4.1)
k = 100:10:200;
% Load entire data set into memory
DR.blockSize = inf;
A = DR();

A = reshape(A,28*28,60000)';
end

n = size(A,1);

k = 50:10:150;
sp = [true true true false];
nbetak = [5 10 50 1];
id = {'SpFD5' 'SpFD10' 'SpFD50' 'FastFD'};
symbol = ['^' 'd' '+' 's'];
color = ['g' 'g' 'g' 'k'];

tic;
[U,S,V] = svd(A);
[U,S,V] = svd(A,'econ');
bruteRuntime = toc;
m = 50;
Am = U(:,1:m)*S(1:m,1:m)*V(:,1:m)'; % For projection error
Am = U(:,1:p)*S(1:m,1:p)*V(:,1:p)'; % For projection error

%% This can take a little time
count = 1;
Expand All @@ -47,7 +55,7 @@

coverr(count,m) = sketcher.coverr(A);

Am_ = sketcher.approx(A,50);
Am_ = sketcher.approx(A,p);
projerr(count,m) = norm(A-Am_,'fro')/norm(A-Am,'fro');

nSVD(count,m) = sketcher.nSVD;
Expand Down
36 changes: 30 additions & 6 deletions FrequentDirections.m
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,15 @@
% Parameterized FD: alpha = scalar in (0,1), fast = false
% Fast Parameterized FD: alpha = scalar in (0,1), fast = true
% alpha = 0.2, fast = true produces 'Fast 0.2FD' in Desai et al.
%
%
% Also implements one non-deterministic method of Teng & Chu (2017)
% that uses a sparse subspace embedding as an intermediate step to
% increase efficiency and take advantage of any sparsity in the input
% matrix:
% SpEmb: sparse = true, alpha = 1, fast = true
% beta >= 1 controls the blocksize for sparse embedding,
% which is equal to beta*k
%
% INPUTS
% k - scalar in [1,d], sketch size. Note that this is commonly referred
% to as l (ell) in references and other implementations
Expand All @@ -31,6 +39,11 @@
% fast - boolean, true indicates fast algorithm (default = TRUE)
% alpha - scalar in [0,1], controls fraction of sketch rows zeroed
% on each rank reduction (default = 1)
% sparse - boolean, true indicates sparse algorithm (default = FALSE)
% beta - scalar >= 1, determines the size of sparse embedding.
% beta*k is the number of rows of A that are reduced on
% each iteration (detault = 10)
% Note that Teng & Chu (2017) use alpha for this parameter
% monitor - boolean, true plots singular values at each rank reduction
% (default = FALSE)
% figureAxis - axis handle for use when monitor = TRUE
Expand All @@ -49,6 +62,7 @@
% Setting the input true (i.e. obj.get(true) as opposed to
% obj.get() or get(obj)) will return a [2k x d] matrix when
% fast = true.
% approx - return a low-rank approximation
% coverr - given [n x d] matrix A, returns covariance error of sketch
% ||A'A - B'B||_2 / ||A||_F^2
% projerr - given [n x d] matrix A, returns projection error of sketch
Expand Down Expand Up @@ -120,7 +134,7 @@
alpha = 1 % [0,1] skrinkage control parameter, 0 = iSVD, 1 = original FD
fast = true % true indicates fast algorithm
sparse = false % true indicates FD with sparse embedding
beta = 1 % scalar >= 1 && <= n/k
beta = 10 % scalar >= 1 && <= n/k
end

properties
Expand Down Expand Up @@ -253,6 +267,15 @@
end

% APPROX Low-rank approximation
%
% INPUT
% A - [n x d] matrix to approximate
%
% OPTIONAL
% k - rank, defaults to sketch size k
%
% OUTPUT
% Ak - [n x d] low-rank approximation using sketch
function Ak = approx(self,A,k)
[~,V] = get(self);
if nargin < 3
Expand Down Expand Up @@ -352,7 +375,7 @@
obj.step(zeros(1,obj.d));
end

% Update count & n
% Update counters
obj.n = sum(cellfun(@(x) x.n,varargin));
obj.nSVD = obj.nSVD + sum(cellfun(@(x) x.nSVD,varargin));
end
Expand All @@ -375,7 +398,6 @@ function setupImpl(self,A)
end

self.B_ = zeros(self.k2_,d);
% TODO : preload first block of data? 1:min(size(A,1),k)

if self.sparse
self.betak_ = fix(self.beta*self.k);
Expand Down Expand Up @@ -411,7 +433,7 @@ function stepImpl(self,A)
indB = find(~any(B,2)); % Index all-zero rows of B
i = 1; % Keep track of data samples appended
while i <= n
% Append data
%% Append data
if ~isempty(indB)
if sparse
if indSA < betak % Space available in buffer
Expand All @@ -438,7 +460,7 @@ function stepImpl(self,A)
end
end

% Update sketch
%% Update sketch
if isempty(indB)
[~,S,V] = svd(B,'econ');
Sprime = reduceRank(S,k,alpha);
Expand Down Expand Up @@ -472,6 +494,8 @@ function stepImpl(self,A)
function releaseImpl(self)
self.B_ = [];
self.d_ = [];
self.betak_ = [];
self.SA_ = [];
if self.monitor
close(self.figureAxis.Parent);
end
Expand Down
2 changes: 1 addition & 1 deletion Testing/TestOutputs.m
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ function bad_merge(testCase)
'FrequentDirections:BadInput');

fd2.k = 16;
fd1(rand(32,16));
fd1(rand(32,24));
fd2(rand(32,32));

testCase.assertError(@() merge(fd1,fd2),...
Expand Down
16 changes: 16 additions & 0 deletions Testing/TestParameters.m
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,22 @@ function bad_fast(testCase)
testCase.assertError(@() FrequentDirections(16,'fast',[1 2]),...
'FrequentDirections:BadInput');
end

function good_sparse(testCase)
k = 16;
fd = FrequentDirections(k,'sparse',true);

testCase.assertEqual(fd.sparse,true);

fd = FrequentDirections(k,'sparse',false);

testCase.assertEqual(fd.sparse,false);
end

function bad_sparse(testCase)
testCase.assertError(@() FrequentDirections(16,'fast',0,'sparse',1),...
'FrequentDirections:BadInput');
end

function good_monitor(testCase)
k = 16;
Expand Down

0 comments on commit b740b5e

Please sign in to comment.