UPDATE: So it looked that still the answer I wrote was not feasible enough. Leveraging the idea from @Onyymbu, here is the updated code that seems working well with very small memory usage.
import numpy as np
from sklearn.gaussian_process.kernels import Matern
from tqdm import tqdm
from joblib import Parallel, delayed
from scipy.spatial.distance import squareform
def G_Batchwise_optimized(M, gamma, R, Z, nu=1.5, length_scale=1.0, batch_size=100):
"""
Optimized computation of G[i,j] = sum_{l,l'} M[l]*M[l'] * k(r[l] + gamma*z[i], r[l'] + gamma*z[j]).
Reduces memory overhead by computing pairwise differences on the fly.
"""
Z = np.array(Z)
R = np.array(R)
kernel = Matern(length_scale=length_scale, nu=nu)
n = R.shape[0]
mm = M.ravel()
### We Generate unique i, j pairs to avoid redundant calculations ###
a = np.arange(n - 1)
b = np.arange(n - 1, 0, -1)
i = np.repeat(a, b)
j = np.concatenate([np.arange(i, n) for i in a + 1])
m = mm[i] * mm[j] # Weight products
### Compute diagonal elements first (where RD = 0, and where ZD = 0) ###
zero = np.zeros((1, R.shape[1])) # Placeholder for 0-difference
diagR = kernel(gamma * (Z[i] - Z[j]), zero).ravel() # R == 0
diagZ = kernel(zero, R[i] - R[j]) # Z == 0
diagM = mm @ mm # Sum over all M elements
### This is Function to process a batch of kernel evaluations ###
def process_batch(start_idx, end_idx):
"""Computes kernel for a batch of pairwise differences."""
RD_batch = R[i[start_idx:end_idx]] - R[j[start_idx:end_idx]]
ZD_batch = gamma * (Z[i[start_idx:end_idx]] - Z[j[start_idx:end_idx]])
### Compute kernel for both directions ###
kernel_vals = kernel(RD_batch, ZD_batch) + kernel(RD_batch, -ZD_batch)
### Sum weighted kernel values ###
return m[start_idx:end_idx] @ kernel_vals
### Compute off-diagonal elements in batches to reduce memory usage ###
num_pairs = len(i)
num_batches = (num_pairs + batch_size - 1) // batch_size # Ensure full coverage
results = Parallel(n_jobs=-1)(
delayed(process_batch)(start, min(start + batch_size, num_pairs))
for start in tqdm(range(0, num_pairs, batch_size), desc="Computing G matrix")
)
### Construct the G matrix ###
G = squareform(np.concatenate(results) + diagR * diagM)
### Fill diagonal elements ###
G[np.diag_indices_from(G)] = 2 * diagZ @ m + diagM
### Optional validation checks. ###
is_symmetric = np.allclose(G, G.T, atol=1e-8)
eigenvalues = np.linalg.eigvalsh(G)
is_semi_positive_definite = np.all(eigenvalues >= -1e-8)
print(f"G is semi-positive definite: {is_semi_positive_definite}")
print(f"G is symmetric: {is_symmetric}")
return G
and toy example:
#%%
# Example usage:
if __name__ == "__main__":
n = 500 # Large-scale test
d = 10
gamma = 0.9
R = np.random.rand(n, d)
Z = np.random.rand(n, d)
M = np.random.rand(n)
G1_optimized = G_Batchwise_optimized(M, gamma, R, Z, nu=1.5, length_scale=1.0, batch_size=1000)
print(f"G computed with shape: {G1_optimized.shape}, Memory usage: {G1_optimized.nbytes / 1e6:.1f} MB")