| | @echo off
|
| | setlocal enabledelayedexpansion
|
| |
|
| | if "%~1"=="" (
|
| | echo Usage: %0 ^<input_file^>
|
| | exit /b 1
|
| | )
|
| |
|
| | set "input_file=%~1"
|
| | set "file_extension=%~x1"
|
| | set "file_name=%~n1"
|
| | set "file_dir=%~dp1"
|
| | |
| |
|
| | if /i "%file_extension%"==".avi" (
|
| | echo Extracting audio from video...
|
| | ffmpeg -i "%input_file%" -q:a 0 "%file_dir%%file_name%.mp3"
|
| | set "audio_file=%file_dir%%file_name%.mp3"
|
| | ) else (
|
| | set "audio_file=%input_file%"
|
| | )
|
| | |
| |
|
| | echo Running Demucs for vocal extraction...
|
| | demucs -n htdemucs_ft "%audio_file%" --two-stems vocals -o "%file_dir%vocal_extracted" --filename "{track}-{stem}.{ext}"
|
| | |
| |
|
| | echo Processing extracted vocals...
|
| | ffmpeg -i "%file_dir%vocal_extracted\htdemucs_ft\%file_name%-vocals.wav" -ar 16000 -ac 1 -acodec pcm_s16le -af "aresample=resampler=soxr, lowpass=f=7500, acompressor=threshold=-12dB:ratio=2:attack=5:release=50, equalizer=f=1000:width_type=o:width=1:g=2, equalizer=f=3000:width_type=o:width=1:g=3" -b:a 128k "%file_dir%%file_name%_vocals_16k_mono_enhanced.wav"
|
| | |
| |
|
| | echo Running transcription...
|
| | python "%~dp0transcribe_japanese_with_diarization.py" "%file_dir%%file_name%_vocals_16k_mono_enhanced.wav"
|
| | |
| |
|
| | echo Cleaning up temporary files...
|
| | if /i "%file_extension%"==".avi" del "%file_dir%%file_name%.mp3"
|
| | rmdir /s /q "%file_dir%vocal_extracted"
|
| | del "%file_dir%%file_name%_vocals_16k_mono_enhanced.wav"
|
| |
|
| | echo Processing complete! |