I've found the solution: a middleware inside my main.py
based on this class. The full code of my main.py
is now the following.
from flask import Flask, render_template, request
from groq import Groq
# App configuration
model_id = "qwen/qwen3-32b"
groq_api_key = "<--my API key :)-->"
PREFIX = "/qwen"
# PrefixMiddleware auxiliary class
class PrefixMiddleware:
def __init__(self, app, prefix):
self.app = app
self.prefix = prefix
def __call__(self, environ, start_response):
path = environ.get('PATH_INFO', '')
if path.startswith(self.prefix):
environ['SCRIPT_NAME'] = self.prefix
environ['PATH_INFO'] = path[len(self.prefix):] or '/'
return self.app(environ, start_response)
# App definition
app = Flask(__name__)
app.wsgi_app = PrefixMiddleware(app.wsgi_app, PREFIX)
client = Groq(api_key=groq_api_key)
# Flask routes
@app.route('/')
def index():
return render_template('index.html')
@app.route('/answer', methods=['POST'])
def answer():
input_text = request.form.get('input_text')
if not input_text:
return "Please provide input text.", 400
try:
completion = client.chat.completions.create(
model=model_id,
messages=[
{"role": "system", "content": "User chatbot"},
{"role": "user", "content": input_text}
],
temperature=1,
max_tokens=1024,
top_p=1,
stream=True,
stop=None,
)
result = ""
for chunk in completion:
result += chunk.choices[0].delta.content or ""
except Exception as e:
return f"An error occurred: {e}", 500
return render_template('index.html', input_text=input_text, result=result)
# __main__
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)
In the Kubernetes Ingress, I also removed the annotation. The other files are the same.