throttled-py can help you achieve fixed rate calls:
You can specify a timeout
to enable wait-and-retry behavior. The rate limiter will wait according to the retry_after
value in RateLimitState and retry automatically.
from throttled import RateLimiterType, Throttled, rate_limiter, utils
throttle = Throttled(
using=RateLimiterType.TOKEN_BUCKET.value,
quota=rate_limiter.per_sec(1_000, burst=1_000),
# ⏳ Set timeout=1 to enable wait-and-retry (max wait 1 second)
timeout=1,
)
def call_api() -> bool:
# ⬆️⏳ Function-level timeout overrides global timeout
result = throttle.limit("/ping", cost=1, timeout=1)
return result.limited
if __name__ == "__main__":
# 👇 The actual QPS is close to the preset quota (1_000 req/s):
# ✅ Total: 10000, 🕒 Latency: 14.7883 ms/op, 🚀Throughput: 1078 req/s (--)
# ❌ Denied: 54 requests
benchmark: utils.Benchmark = utils.Benchmark()
denied_num: int = sum(benchmark.concurrent(call_api, 10_000, workers=16))
print(f"❌ Denied: {denied_num} requests")