Source code for dynsight._internal.analysis.dim_reduction
from __future__ import annotations
from typing import TYPE_CHECKING, Callable
if TYPE_CHECKING:
from numpy.typing import NDArray
import numpy as np
from scipy.spatial.distance import cdist
[docs]
def two_nn_estimator(
data: NDArray[np.float64],
metric: Callable[[NDArray[np.float64], NDArray[np.float64]], float],
) -> float:
"""Computes the intrinsic dimension with Two-NN estimator.
See for reference: Facco, E., d'Errico, M., Rodriguez, A. et al.,
Sci Rep 7, 12140 (2017). https://doi.org/10.1038/s41598-017-11873-y
Parameters:
data:
Has shape (n_atoms, n_frames, n_dims)
metric:
Distance metric for scipy.spatial.distance.cdist.
Example:
.. testcode:: 2nn-test
from sklearn.datasets import make_swiss_roll
import numpy as np
from dynsight.analysis import two_nn_estimator
from scipy.spatial.distance import euclidean
# Swiss roll is intrinsically 2-dimensional
frame, _ = make_swiss_roll(
n_samples=500,
noise=0.05,
random_state=42,
)
# Create artificial trajectory with 3 identical frames
data = np.stack([frame, frame, frame], axis=1)
intr_dim = two_nn_estimator(data, euclidean)
.. testcode:: 2nn-test
:hide:
assert np.isclose(intr_dim, 2.0, rtol=0.02)
"""
list_of_ratios = []
for time in range(data.shape[1]):
frame = data[:, time, :]
distances = cdist(frame, frame, metric=metric)
sorted_dist = np.sort(distances, axis=1)
d1 = sorted_dist[:, 1]
d2 = sorted_dist[:, 2]
mask = d1 > 0
mu = d2[mask] / d1[mask]
list_of_ratios.append(mu)
ratios = np.concatenate(list_of_ratios)
return ratios.size / np.sum(np.log(ratios))