There is no LIMIT
clause in the SQL UPDATE
statement.
Even if there was, LIMIT
/ OFFSET
for "pagination" does not scale well.
Something like this could work:
CREATE OR REPLACE PROCEDURE upd_in_batches(_batch_size int = 1000)
LANGUAGE plpgsql AS
$proc$
DECLARE
_id_bound int = 0; -- or whatever?
BEGIN
LOOP
WITH sel AS (
SELECT a.id -- id = PK!
FROM tablea a
WHERE a.id > _id_bound
-- AND <some other conditions in Table A>
ORDER BY a.id
LIMIT _batch_size
FOR UPDATE
)
, upd AS (
UPDATE tablea a
SET target_col = b.b_source_col
FROM sel s
JOIN tableb b USING (id)
WHERE a.id = s.id
AND a.target_col IS DISTINCT FROM b.b_source_col
)
SELECT max(id) -- always returns a row
FROM sel
INTO _id_bound;
IF _id_bound IS NULL THEN
EXIT; -- no more rows found; we're done, exit loop
ELSE
COMMIT;
END IF;
END LOOP;
END
$proc$
See: