| | from funasr import AutoModel |
| | from funasr.utils.postprocess_utils import rich_transcription_postprocess |
| |
|
| | model_dir = "FunAudioLLM/SenseVoiceSmall" |
| |
|
| |
|
| | model = AutoModel( |
| | model=model_dir, |
| | vad_model="fsmn-vad", |
| | vad_kwargs={"max_single_segment_time": 30000}, |
| | device="cuda:0", |
| | hub="hf", |
| | ) |
| |
|
| | |
| | res = model.generate( |
| | input=f"{model.model_path}/example/en.mp3", |
| | cache={}, |
| | language="auto", |
| | use_itn=True, |
| | batch_size_s=60, |
| | merge_vad=True, |
| | merge_length_s=15, |
| | ) |
| | text = rich_transcription_postprocess(res[0]["text"]) |
| | print(text) |
| |
|
| |
|