It is working now thanks to @sidhartthhhhh idea. The corrected code see below:
import numpy as np
from scipy.signal import correlate2d
def fast_corr2d_pearson(a, b):
"""
Fast 2D Pearson cross-correlation of two arrays using convolution.
Output shape is (2*rows - 1, 2*cols - 1) like correlate2d with mode=full.
"""
assert a.shape == b.shape
a = a.astype(np.float64)
b = b.astype(np.float64)
rows, cols = a.shape
ones = np.ones_like(a) # These are used to calculate the number of overlapping elements at each lag
n = correlate2d(ones, ones) # number of overlapping bins for each offset
sum_a = correlate2d(a, ones) # sum of a in overlapping region
sum_b = correlate2d(ones, b) # sum of b in overlapping region
sum_ab = correlate2d(a, b)
sum_a2 = correlate2d(a**2, ones)
sum_b2 = correlate2d(ones, b**2)
numerator = sum_ab-sum_a*sum_b/n
s_a = sum_a2 - sum_a**2/n
s_b = sum_b2 - sum_b**2/n
denominator = np.sqrt(s_a*s_b)
with np.errstate(invalid='ignore', divide='ignore'):
corr = numerator / denominator
corr[np.isnan(corr)] = 0
return corr
Each step can be verified to be correct:
lag_row,lag_col=3,7 # any test case
row_idx, col_idx = rows-1+lag_row, cols-1+lag_col # 2d index in resulting corr matrix
a_lagged = a[lag_row:,lag_col:] # only works for lag_row>0, lag_col>0
sum_a[row_idx,col_idx] == np.sum(a_lagged)
Slow code for comparison:
from scipy.stats import pearsonr
rows,cols = data.shape
row_lags = range(-rows + 1, rows)
col_lags = range(-cols + 1, cols)
autocorr = np.empty((len(row_lags), len(col_lags)))
autocorr.shape
for lag_row in tqdm(row_lags):
for lag_col in col_lags:
# Create a lagged version of the data
# todo: implement logic for lag=0
if lag_row >= 0 and lag_col >= 0:
data_0 = data[lag_row:, lag_col:]
data_1 = data[:-lag_row, :-lag_col]
elif lag_row < 0 and lag_col < 0:
data_0 = data[:lag_row, :lag_col]
data_1 = data[-lag_row:, -lag_col:]
elif lag_row >= 0:
data_0 = data[lag_row:, :lag_col]
data_1 = data[:-lag_row, -lag_col:]
else:
data_0 = data[:lag_row, lag_col:]
data_1 = data[-lag_row:, :-lag_col]
try:
corr=pearsonr(data_0.flatten(),data_1.flatten()).statistic
except:
corr=np.nan
autocorr[lag_row+rows-1, lag_col+cols-1] = corr
This example is for autocorrelation but works the same.
Related discussion https://stackoverflow.com/a/51168178