I improved script by @abdo OUHNA a bit The main problem with it was that when Y coordinates for words in one string differs a bit, sorting worked incorrectly. Improved version uses clustering and work OK for me for receipts.
Change eps param if needed. It's meaning is max difference in pixels to be on the same line.
result = reader.readtext(temp_image_path)
data = []
for entry in result:
coordinates, text, confidence = entry
(tl_x, tl_y), (tr_x, tr_y), (br_x, br_y), (bl_x, bl_y) = coordinates
data.append([text, tl_x, tl_y, tr_x, tr_y, bl_x, bl_y, br_x, br_y, confidence])
df = pd.DataFrame(data, columns=['text', 'tl_x', 'tl_y', 'tr_x', 'tr_y', 'bl_x', 'bl_y', 'br_x', 'br_y', 'confidence'])
df['mid_y'] = (df['tl_y'] + df['bl_y']) / 2
# DBSCAN Clustering algo
eps = 5 # !!!!!!! CHANGE IT IF NEEDED !!!!!! max distance to be on one line
dbscan = DBSCAN(eps=eps, min_samples=1, metric='euclidean')
df['line_cluster'] = dbscan.fit_predict(df[['mid_y']])
df_sorted = df.sort_values(by=['line_cluster', 'tl_x'])
grouped_texts = df_sorted.groupby('line_cluster')['text'].apply(lambda words: " ".join(words)).tolist()
extracted_text = "\n".join(grouped_texts)