I'm answering my own question in order to post the final solution to my problem, but it's mainly thanks to Lewis answers and comments below.
Thank you Lewis !
the input .wav file must be PCM 16 bit mono, wich can be obtain with "ffmpeg -i "speech3.wav" "outfile.wav"
in windows cmd after installing ffmpeg.
import wave
import json
from vosk import Model, KaldiRecognizer, SetLogLevel
#.wav file must be PCM 16-bit mono !
def vosk(wavFile):
SetLogLevel(0)
wf = wave.open(wavFile, "rb")
model = Model(model_path="voskSmallFr", model_name="vosk-model-small-fr-0.22")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)
rec.SetPartialWords(True)
text = []
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
# if silence detected save result
if rec.AcceptWaveform(data):
text.append(json.loads(rec.Result())["text"])
text.append(json.loads(rec.FinalResult())["text"])
text=str(text)[2:-2]
return text
print(vosk("outfile.wav"))