I ran your original script and it is still running, patience is not my virtue, so is this what you are looking for?
import pandas as pd
import numpy as np
#generate sample DataFrame
dflength = 7_303_787
#create a "PLU" column of random integers (100000–199999)
PLUs = pd.DataFrame(
np.random.randint(100_000, 200_000, size=(dflength, 1)),
columns=['PLU']
)
#create an "ArticleName" column of random integers (1–99)
ArticleNames = pd.DataFrame(
np.random.randint(1, 100, size=(dflength, 1)),
columns=['ArticleName']
)
#concatenate into one DataFrame of shape (dflength, 2)
df = pd.concat([PLUs, ArticleNames], axis=1)
#loop per‐PLU in one pass via groupby
for plu_value, group in df.groupby('PLU')['ArticleName']:
# group is a Series of ArticleName values for this PLU
print(f"PLU={plu_value}: group shape = {group.shape}")
print(f" last ArticleName = {group.iloc[-1]}")
#optional tho: If you only need the last ArticleName per PLU,
last_per_plu = df.groupby('PLU')['ArticleName'].last()
# last_per_plu is a Series indexed by PLU, with the last ArticleName as value
print("\nVectorized result (last ArticleName per PLU):")
print(last_per_plu.head()) # show first few entries