Thanks furas for the explanation, helped me realize the problem itself. Gonna leave a version that yields in batches and, in adjusting the batch size, you can play with the ratio the memory it uses and the response time.
@app.route("/streamed_batches")
def streamed_response_batches():
start_time = time.time()
mem_before = memory_usage()[0]
BATCH_SIZE = 20
def generate():
yield "["
first = True
batch = []
for i in range(BIG_SIZE):
batch.append({"id": i, "value": f"Item-{i}"})
if len(batch) >= BATCH_SIZE or i == BIG_SIZE - 1:
# Flush this batch
chunk = json.dumps(batch)
if not first:
yield ","
yield chunk[1:-1]
batch = []
first = False
yield "]"
mem_after = memory_usage()[0]
elapsed = time.time() - start_time
print(f"[STREAMED_BATCHES] Memory Before: {mem_before:.2f} MB, "
f"After: {mem_after:.2f} MB, Elapsed: {elapsed:.2f}s")
return Response(stream_with_context(generate()), mimetype="application/json")