79704959

Date: 2025-07-17 14:22:36
Score: 2
Natty:
Report link

It is working now thanks to @sidhartthhhhh idea. The corrected code see below:

import numpy as np
from scipy.signal import correlate2d
def fast_corr2d_pearson(a, b):
    """
    Fast 2D Pearson cross-correlation of two arrays using convolution.
    Output shape is (2*rows - 1, 2*cols - 1) like correlate2d with mode=full.
    """
    assert a.shape == b.shape
    a = a.astype(np.float64)
    b = b.astype(np.float64)
    
    rows, cols = a.shape
    
    ones = np.ones_like(a) # These are used to calculate the number of overlapping elements at each lag
    n = correlate2d(ones, ones) # number of overlapping bins for each offset
    sum_a = correlate2d(a, ones) # sum of a in overlapping region
    sum_b = correlate2d(ones, b) # sum of b in overlapping region
    sum_ab = correlate2d(a, b)
    sum_a2 = correlate2d(a**2, ones)
    sum_b2 = correlate2d(ones, b**2)
    
    numerator = sum_ab-sum_a*sum_b/n
    s_a = sum_a2 - sum_a**2/n
    s_b = sum_b2 - sum_b**2/n
    denominator = np.sqrt(s_a*s_b)
    
    with np.errstate(invalid='ignore', divide='ignore'):
        corr = numerator / denominator
    corr[np.isnan(corr)] = 0
    return corr 

Each step can be verified to be correct:

lag_row,lag_col=3,7 # any test case
row_idx, col_idx = rows-1+lag_row, cols-1+lag_col # 2d index in resulting corr matrix
a_lagged = a[lag_row:,lag_col:] # only works for lag_row>0, lag_col>0
sum_a[row_idx,col_idx] == np.sum(a_lagged)

Slow code for comparison:

from scipy.stats import pearsonr

rows,cols = data.shape
row_lags = range(-rows + 1, rows)
col_lags = range(-cols + 1, cols)
autocorr = np.empty((len(row_lags), len(col_lags)))
autocorr.shape

for lag_row in tqdm(row_lags):
    for lag_col in col_lags:
        # Create a lagged version of the data
        # todo: implement logic for lag=0
        if lag_row >= 0 and lag_col >= 0:
            data_0 = data[lag_row:, lag_col:]
            data_1 = data[:-lag_row, :-lag_col]
        elif lag_row < 0 and lag_col < 0:
            data_0 = data[:lag_row, :lag_col]
            data_1 = data[-lag_row:, -lag_col:]
        elif lag_row >= 0:
            data_0 = data[lag_row:, :lag_col]
            data_1 = data[:-lag_row, -lag_col:]
        else:
            data_0 = data[:lag_row, lag_col:]
            data_1 = data[-lag_row:, :-lag_col]
            
        try:
            corr=pearsonr(data_0.flatten(),data_1.flatten()).statistic
        except:
            corr=np.nan
        
        autocorr[lag_row+rows-1, lag_col+cols-1] = corr

This example is for autocorrelation but works the same.

Related discussion https://stackoverflow.com/a/51168178

Reasons:
  • Blacklisted phrase (0.5): thanks
  • Blacklisted phrase (1): stackoverflow
  • Long answer (-1):
  • Has code block (-0.5):
  • User mentioned (1): @sidhartthhhhh
  • Self-answer (0.5):
  • Low reputation (0.5):
Posted by: pas-calc