diff --git a/README.md b/README.md index 35eebe9..b6afd92 100644 --- a/README.md +++ b/README.md @@ -6,4 +6,4 @@ Fast, GPU-friendly, differentiable computation of Intrinsic Dimension via Maximu ### References - [E. Levina, P. Bickel; "Maximum Likelihood Estimation of Intrinsic Dimension", Advances in Neural Information Processing Systems, 2004](https://papers.nips.cc/paper_files/paper/2004/hash/74934548253bcab8490ebd74afed7031-Abstract.html) -- [E. Facco, M. d'Errico, A. Rodriguez, A. Laio; "Estimating the intrinsic dimension of datasets by a minimal neighborhood information", Mature Scientific Reports, 2017](https://www.nature.com/articles/s41598-017-11873-y) +- [E. Facco, M. d'Errico, A. Rodriguez, A. Laio; "Estimating the intrinsic dimension of datasets by a minimal neighborhood information", Nature Scientific Reports, 2017](https://www.nature.com/articles/s41598-017-11873-y) diff --git a/fastwonn/mle.py b/fastwonn/mle.py index 4df825f..719bc2e 100755 --- a/fastwonn/mle.py +++ b/fastwonn/mle.py @@ -1,69 +1,71 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# ────────────────────────────────────────────────────────────────────────────── -from contextlib import ExitStack - -import torch -from torch import Tensor - -from .impl import call_to_impl_cdist_topk - -# ────────────────────────────────────────────────────────────────────────────── -__all__ = ["mle_id", "mle_id_avg"] - - -# ────────────────────────────────────────────────────────────────────────────── -def mle_id( - x: Tensor, - nneigh: int = 2, - twonn_fix: bool = False, - differentiable: bool = False, - impl: str = "torch", -) -> Tensor: - - with ExitStack() as stack: - stack.enter_context(torch.no_grad()) if not differentiable else None - - ks: Tensor = call_to_impl_cdist_topk[impl](x, nneigh, False)[:, 1:] - - if twonn_fix and nneigh == 2: - return -2 * ks.size(0) / torch.log(torch.div(*torch.unbind(ks, 1))).sum() - - return (2 * (nneigh - 1) / torch.log(ks[:, -1].view(-1, 1) / ks).sum(1)).mean() - - -# ────────────────────────────────────────────────────────────────────────────── - - -def mle_id_avg( - x: Tensor, - nneigh_min: int = 2, - nneigh_max: int = 10, - twonn_fix: bool = False, - differentiable: bool = False, - impl: str = "torch", -) -> Tensor: - - with ExitStack() as stack: - stack.enter_context(torch.no_grad()) if not differentiable else None - - ks: Tensor = call_to_impl_cdist_topk[impl](x, nneigh_max, False)[:, 1:] - runs = [ - ( - 2 - * (nneigh_max - 1 - i) - / torch.log( - ks[:, -1 - i].view(-1, 1) / (ks[:, :-i] if i != 0 else ks) - ).sum(1) - ).mean() - for i in range(nneigh_max - nneigh_min + (not twonn_fix)) - ] - - if twonn_fix and nneigh_min == 2: - runs.append( - -2 - * ks.size(0) - / torch.log(torch.div(*torch.unbind(ks[:, 0:2], 1))).sum() - ) - - return torch.stack(runs).nanmean() +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# ────────────────────────────────────────────────────────────────────────────── +from contextlib import ExitStack + +import torch +from torch import Tensor + +from .impl import call_to_impl_cdist_topk + +# ────────────────────────────────────────────────────────────────────────────── +__all__ = ["mle_id", "mle_id_avg"] + + +# ────────────────────────────────────────────────────────────────────────────── +def mle_id( + x: Tensor, + nneigh: int = 2, + twonn_fix: bool = False, + differentiable: bool = False, + impl: str = "torch", +) -> Tensor: + + with ExitStack() as stack: + stack.enter_context(torch.no_grad()) if not differentiable else None + + ks: Tensor = call_to_impl_cdist_topk[impl](x, nneigh, False)[:, 1:] + + if twonn_fix and nneigh == 2: + return -2 * ks.size(0) / torch.log(torch.div(*torch.unbind(ks, 1))).sum() + + return (2 * (nneigh - 1) / torch.log(ks[:, -1].view(-1, 1) / ks).sum(1)).mean() + + +# ────────────────────────────────────────────────────────────────────────────── + + +def mle_id_avg( + x: Tensor, + nneigh_min: int = 2, + nneigh_max: int = 10, + twonn_fix: bool = False, + differentiable: bool = False, + impl: str = "torch", +) -> Tensor: + + with ExitStack() as stack: + stack.enter_context(torch.no_grad()) if not differentiable else None + + twonn_sep: bool = twonn_fix and nneigh_min == 2 + + ks: Tensor = call_to_impl_cdist_topk[impl](x, nneigh_max, False)[:, 1:] + runs = [ + ( + 2 + * (nneigh_max - 1 - i) + / torch.log( + ks[:, -1 - i].view(-1, 1) / (ks[:, :-i] if i != 0 else ks) + ).sum(1) + ).mean() + for i in range(nneigh_max - nneigh_min + (not twonn_sep)) + ] + + if twonn_sep: + runs.append( + -2 + * ks.size(0) + / torch.log(torch.div(*torch.unbind(ks[:, 0:2], 1))).sum() + ) + + return torch.stack(runs).nanmean() diff --git a/setup.py b/setup.py index dd2f71a..8d5de1d 100755 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ def read(fname): setup( name=PACKAGENAME, - version="0.0.7", + version="0.0.8", author="Emanuele Ballarin", author_email="emanuele@ballarin.cc", url="https://github.com/emaballarin/fastwonn",