-
Notifications
You must be signed in to change notification settings - Fork 0
/
complex_fixed_rank.py
302 lines (239 loc) · 11.3 KB
/
complex_fixed_rank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
"""
Module containing manifolds of fixed rank matrices.
"""
import numpy as np
from pymanopt.manifolds.manifold import EuclideanEmbeddedSubmanifold
from pymanopt.manifolds.complex_stiefel import ComplexStiefel
from pymanopt.tools import ndarraySequenceMixin
class ComplexFixedRankEmbedded(EuclideanEmbeddedSubmanifold):
"""
Note: Currently not compatible with the second order TrustRegions solver.
Should be fixed soon.
Manifold of m-by-n real matrices of fixed rank k. This follows the
embedded geometry described in Bart Vandereycken's 2013 paper:
"Low-rank matrix completion by Riemannian optimization".
Paper link: http://arxiv.org/pdf/1209.3834.pdf
For efficiency purposes, Pymanopt does not represent points on this
manifold explicitly using m x n matrices, but instead implicitly using
a truncated singular value decomposition. Specifically, a point is
represented by a tuple (u, s, vh) of three numpy arrays. The arrays u,
s and vt have shapes (m, k), (k,) and (k, n) respectively, and the low
rank matrix which they represent can be recovered by the matrix product
u * diag(s) * vt.
For example, to optimize over the space of 5 by 4 matrices with rank 3,
we would need to
>>> import pymanopt.manifolds
>>> manifold = pymanopt.manifolds.FixedRankEmbedded(5, 4, 3)
Then the shapes will be as follows:
>>> x = manifold.rand()
>>> x[0].shape
(5, 3)
>>> x[1].shape
(3,)
>>> x[2].shape
(3, 4)
and the full matrix can be recovered using the matrix product
x[0] * diag(x[1]) * x[2]:
>>> import numpy as np
>>> X = x[0] @ np.diag(x[1]) @ x[2]
Tangent vectors are represented as a tuple (Up, M, Vp). The matrices Up
(mxk) and Vp (nxk) obey Up'*U = 0 and Vp'*V = 0.
The matrix M (kxk) is arbitrary. Such a structure corresponds to the
following tangent vector in the ambient space of mxn matrices:
Z = U*M*V' + Up*V' + U*Vp'
where (U, S, V) is the current point and (Up, M, Vp) is the tangent
vector at that point.
Vectors in the ambient space are best represented as mxn matrices. If
these are low-rank, they may also be represented as structures with
U, S, V fields, such that Z = U*S*V'. There are no restrictions on what
U, S and V are, as long as their product as indicated yields a real, mxn
matrix.
The chosen geometry yields a Riemannian submanifold of the embedding
space R^(mxn) equipped with the usual trace (Frobenius) inner product.
Please cite the Pymanopt paper as well as the research paper:
@Article{vandereycken2013lowrank,
Title = {Low-rank matrix completion by {Riemannian} optimization},
Author = {Vandereycken, B.},
Journal = {SIAM Journal on Optimization},
Year = {2013},
Number = {2},
Pages = {1214--1236},
Volume = {23},
Doi = {10.1137/110845768}
}
This file is based on fixedrankembeddedfactory from Manopt: www.manopt.org.
Ported by: Jamie Townsend, Sebastian Weichwald
Original author: Nicolas Boumal, Dec. 30, 2012.
"""
# TODO(nkoep): Change the signature to (self, dims, k) where dims has to be
# a 2-element iterable specifying m and n.
def __init__(self, m, n, k):
self._m = m
self._n = n
self._k = k
self._stiefel_m = ComplexStiefel(m, k)
self._stiefel_n = ComplexStiefel(n, k)
name = ("Manifold of complex {m}-by-{n} matrices with rank {k} and embedded "
"geometry".format(m=m, n=n, k=k))
dimension = 2*(m + n - k) * k
super().__init__(name, dimension, point_layout=3)
@property
def typicaldist(self):
return self.dim
def inner(self, X, G, H):
return np.sum(np.real(np.tensordot(np.conjugate(a), b)) for (a, b) in zip(G, H))
def _apply_ambient(self, Z, W):
"""
For a given ambient vector Z, given as a tuple (U, S, V) such that
Z = U*S*V', applies it to a matrix W to calculate the matrix product
ZW.
"""
if isinstance(Z, (list, tuple)):
return Z[0] @ Z[1] @ Z[2].conj().T @ W
return Z @ W
def _apply_ambient_transpose(self, Z, W):
"""
Same as apply_ambient, but applies Z' to W.
"""
if isinstance(Z, (list, tuple)):
return Z[2] @ Z[1].conj() @ Z[0].conj().T @ W
return Z.conj().T @ W
def proj(self, X, Z):
"""
Note that Z must either be an m x n matrix from the ambient space, or
else a tuple (Uz, Sz, Vz), where Uz * Sz * Vz is in the ambient space
(of low-rank matrices).
This function then returns a tangent vector parameterized as
(Up, M, Vp), as described in the class docstring.
"""
ZV = self._apply_ambient(Z, X[2].conj().T)
UtZV = X[0].conj().T @ ZV
ZtU = self._apply_ambient_transpose(Z, X[0])
Up = ZV - X[0] @ UtZV
M = UtZV
Vp = ZtU - X[2].conj().T @ UtZV.conj().T
return _FixedRankTangentVector((Up, M, Vp))
def egrad2rgrad(self, x, egrad):
"""
Assuming that the cost function being optimized has been defined
in terms of the low-rank singular value decomposition of X, the
gradient returned by the autodiff backends will have three components
and will be in the form of a tuple egrad = (df/dU, df/dS, df/dV).
This function correctly maps a gradient of this form into the tangent
space. See https://j-towns.github.io/papers/svd-derivative.pdf for a
derivation.
"""
utdu = x[0].conj().T @ egrad[0]
uutdu = x[0] @ utdu
Up = (egrad[0] - uutdu) / x[1]
vtdv = x[2] @ egrad[2].conj().T
vvtdv = x[2].conj().T @ vtdv
Vp = (egrad[2].conj().T - vvtdv) / x[1]
i = np.eye(self._k,dtype=np.complex128)
f = 1 / (x[1][np.newaxis, :]**2 - x[1][:, np.newaxis]**2 + i)
M = (f * (utdu - utdu.conj().T) * x[1] +
x[1][:, np.newaxis] * f * (vtdv - vtdv.conj().T) + np.diag(egrad[1]))
return _FixedRankTangentVector((Up, M, Vp))
# TODO(nkoep): Implement the 'weingarten' method to support the
# trust-region solver, cf.
# https://sites.uclouvain.be/absil/2013-01/Weingarten_07PA_techrep.pdf
# This retraction is second order, following general results from
# Absil, Malick, "Projection-like retractions on matrix manifolds",
# S IAM J. Optim., 22 (2012), pp. 135-158.
def retr(self, X, Z):
'''
Commented out the computation of the metric projection via QR decomposition.
Instead just computed the straightforward rank-k truncated SVD.
'''
# Qu, Ru = np.linalg.qr(Z[0])
# Qv, Rv = np.linalg.qr(Z[2])
# T = np.vstack((np.hstack((np.diag(X[1]) + Z[1], Rv.conj().T)),
# np.hstack((Ru, np.zeros((self._k, self._k),dtype=np.complex128)))))
# # Numpy svd outputs St as a 1d vector, not a matrix.
# Ut, St, Vt = np.linalg.svd(T, full_matrices=False)
# # Transpose because numpy outputs it the wrong way.
# Vt = Vt.T
# U = np.hstack((X[0], Qu)) @ Ut[:, :self._k]
# V = np.hstack((X[2].conj().T, Qv)) @ Vt[:, :self._k]
# S = St[:self._k] + np.spacing(1)
# return (U, S, V.conj().T)
Xf = X[0] @ np.diag(X[1]) @ X[2]
Zf = self.tangent2ambient(X, Z)
Zf = Zf[0] @ Zf[1] @ Zf[2].conj().T
U,S,Vh = np.linalg.svd(Xf+Zf,full_matrices=False)
Ur = U[:,:self._k]
Sr = S[:self._k] + np.spacing(1)
Vrh = Vh[:self._k]
return (Ur,Sr,Vrh)
def norm(self, X, G):
return np.sqrt(self.inner(X, G, G))
def rand(self):
u = self._stiefel_m.rand()
s = np.sort(np.random.rand(self._k))[::-1]
vh = self._stiefel_n.rand().conj().T
return (u, s, vh)
def _tangent(self, X, Z):
"""
Given Z in tangent vector format, projects the components Up and Vp
such that they satisfy the tangent space constraints up to numerical
errors. If Z was indeed a tangent vector at X, this should barely
affect Z (it would not at all if we had infinite numerical accuracy).
"""
Up = Z[0] - X[0] @ X[0].conj().T @ Z[0]
Vp = Z[2] - X[2].conj().T @ X[2] @ Z[2]
return _FixedRankTangentVector((Up, Z[1], Vp))
def randvec(self, X):
Up = np.random.randn(self._m, self._k) + 1j*np.random.randn(self._m, self._k)
Vp = np.random.randn(self._n, self._k) + 1j*np.random.randn(self._n, self._k)
M = np.random.randn(self._k, self._k) + 1j*np.random.randn(self._k, self._k)
Z = self._tangent(X, (Up, M, Vp))
nrm = self.norm(X, Z)
return _FixedRankTangentVector((Z[0]/nrm, Z[1]/nrm, Z[2]/nrm))
def tangent2ambient(self, X, Z):
"""
Transforms a tangent vector Z represented as a structure (Up, M, Vp)
into a structure with fields (U, S, V) that represents that same
tangent vector in the ambient space of mxn matrices, as U*S*V'.
This matrix is equal to X.U*Z.M*X.V' + Z.Up*X.V' + X.U*Z.Vp'. The
latter is an mxn matrix, which could be too large to build
explicitly, and this is why we return a low-rank representation
instead. Note that there are no guarantees on U, S and V other than
that USV' is the desired matrix. In particular, U and V are not (in
general) orthonormal and S is not (in general) diagonal.
(In this implementation, S is identity, but this might change.)
"""
U = np.hstack((X[0] @ Z[1] + Z[0], X[0]))
S = np.eye(2 * self._k)
V = np.hstack(([X[2].conj().T, Z[2]]))
return (U, S, V)
# Comment from Manopt:
# New vector transport on June 24, 2014 (as indicated by Bart)
# Reference: Absil, Mahony, Sepulchre 2008 section 8.1.3:
# For Riemannian submanifolds of a Euclidean space, it is acceptable to
# transport simply by orthogonal projection of the tangent vector
# translated in the ambient space.
def transp(self, X1, X2, G):
return self.proj(X2, self.tangent2ambient(X1, G))
def zerovec(self, X):
return _FixedRankTangentVector((np.zeros((self._m, self._k),dtype=np.complex128),
np.zeros((self._k, self._k),dtype=np.complex128),
np.zeros((self._n, self._k),dtype=np.complex128)))
class _FixedRankTangentVector(tuple, ndarraySequenceMixin):
def __repr__(self):
return "_FixedRankTangentVector: " + super().__repr__()
def to_ambient(self, x):
Z1 = x[0] @ self[1] @ x[2]
Z2 = self[0] @ x[2]
Z3 = x[0] @ self[2].conj().T
return Z1 + Z2 + Z3
def __add__(self, other):
return _FixedRankTangentVector((s + o for (s, o) in zip(self, other)))
def __sub__(self, other):
return _FixedRankTangentVector((s - o for (s, o) in zip(self, other)))
def __mul__(self, other):
return _FixedRankTangentVector((other * s for s in self))
__rmul__ = __mul__
def __div__(self, other):
return _FixedRankTangentVector((val / other for val in self))
def __neg__(self):
return _FixedRankTangentVector((-val for val in self))