import { randomUUID } from 'node:crypto'; import { createReadStream } from 'node:fs'; import { extname, posix } from 'node:path'; import { Client } from 'ssh2'; import type { ClientChannel, ConnectConfig, SFTPWrapper } from 'ssh2'; interface WhisperConfig { audioDir: string; command: string; envActivatePath: string; envName: string; ffmpegCommand: string; host: string; language: string; model: string; password: string; port: number; sshReadyTimeoutMs: number; timeoutMs: number; transcriptDir: string; username: string; } interface TranscriptionInput { localFilePath: string; originalName: string; } interface TranscriptionOutput { remoteAudioPath: string; remoteTranscriptPath: string; transcript: string; } const requiredEnv = (name: string, fallback?: string) => { const value = process.env[name] ?? fallback; if (!value) { throw new Error(`Missing required environment variable: ${name}`); } return value; }; const getConfig = (): WhisperConfig => ({ audioDir: requiredEnv('WHISPER_VM_AUDIO_DIR', '/home/kevin/mom_audio'), command: requiredEnv('WHISPER_COMMAND', 'whisper'), envActivatePath: requiredEnv( 'WHISPER_ENV_ACTIVATE', '/home/kevin/whisper-env/bin/activate' ), envName: requiredEnv('WHISPER_ENV_NAME', 'whisper-env'), ffmpegCommand: requiredEnv('FFMPEG_COMMAND', 'ffmpeg'), host: requiredEnv('WHISPER_VM_HOST', '172.16.10.51'), language: requiredEnv('WHISPER_LANGUAGE', 'English'), model: requiredEnv('WHISPER_MODEL', 'medium'), password: requiredEnv('WHISPER_VM_PASSWORD'), port: Number(process.env.WHISPER_VM_PORT ?? 22), sshReadyTimeoutMs: Number(process.env.WHISPER_SSH_READY_TIMEOUT_MS ?? 60_000), timeoutMs: Number(process.env.WHISPER_TIMEOUT_MS ?? 1_800_000), transcriptDir: requiredEnv( 'WHISPER_VM_TRANSCRIPT_DIR', '/home/kevin/mom_transcripts' ), username: requiredEnv('WHISPER_VM_USER', 'kevin') }); const trimRemoteDir = (dir: string) => dir.replace(/\/+$/, ''); const shellQuote = (value: string) => `'${value.replace(/'/g, "'\\''")}'`; const runInsideWhisperEnv = (config: WhisperConfig, command: string) => { const script = [ 'set -e', `if [ -f ${shellQuote(config.envActivatePath)} ]; then`, ` . ${shellQuote(config.envActivatePath)}`, 'elif command -v conda >/dev/null 2>&1; then', ' eval "$(conda shell.bash hook)"', ` conda activate ${shellQuote(config.envName)}`, 'else', ` echo ${shellQuote( 'Unable to activate the transcription environment. Check the remote activation path.' )} >&2`, ' exit 127', 'fi', command ].join('\n'); return `bash -lc ${shellQuote(script)}`; }; const safeBaseName = (fileName: string) => { const withoutExtension = fileName.replace(/\.[^.]+$/, ''); const safe = withoutExtension .replace(/[^a-zA-Z0-9._-]+/g, '-') .replace(/^-+|-+$/g, '') .slice(0, 80); return safe || 'meeting-audio'; }; const connectSsh = (config: WhisperConfig) => new Promise((resolve, reject) => { const client = new Client(); const connection: ConnectConfig = { host: config.host, keepaliveInterval: 15_000, password: config.password, port: config.port, readyTimeout: config.sshReadyTimeoutMs, username: config.username }; client .on('ready', () => resolve(client)) .on('error', (error) => { const message = error instanceof Error ? error.message : 'Unknown SSH connection error.'; reject( new Error( `Unable to connect to the transcription service at ${config.host}:${config.port}. ` + `Check that the VM is powered on, reachable from this machine, and accepting SSH. Details: ${message}` ) ); }) .connect(connection); }); const getSftp = (client: Client) => new Promise((resolve, reject) => { client.sftp((error, sftp) => { if (error) { reject(error); return; } resolve(sftp); }); }); const execCommand = (client: Client, command: string, timeoutMs: number) => new Promise((resolve, reject) => { let stream: ClientChannel | null = null; let stdout = ''; let stderr = ''; let settled = false; const finish = (error?: Error, output?: string) => { if (settled) return; settled = true; clearTimeout(timeout); if (error) { reject(error); return; } resolve(output ?? ''); }; const timeout = setTimeout(() => { stream?.close(); finish(new Error('Transcription timed out on the processing machine.')); }, timeoutMs); client.exec(command, (error, commandStream) => { if (error) { finish(error); return; } stream = commandStream; commandStream.on('data', (chunk: Buffer) => { stdout += chunk.toString(); }); commandStream.stderr.on('data', (chunk: Buffer) => { stderr += chunk.toString(); }); commandStream.on('close', (code: number | null) => { if (code && code !== 0) { finish( new Error( stderr.trim() || stdout.trim() || `Transcription failed with exit code ${code}.` ) ); return; } finish(undefined, `${stdout}${stderr}`.trim()); }); }); }); const uploadFile = ( sftp: SFTPWrapper, localFilePath: string, remoteFilePath: string ) => new Promise((resolve, reject) => { let settled = false; const source = createReadStream(localFilePath); const target = sftp.createWriteStream(remoteFilePath, { flags: 'w', mode: 0o640 }); const finish = (error?: Error) => { if (settled) return; settled = true; if (error) { reject(error); return; } resolve(); }; source.on('error', finish); target.on('error', finish); target.on('close', () => finish()); source.pipe(target); }); const readRemoteFile = (sftp: SFTPWrapper, remoteFilePath: string) => new Promise((resolve, reject) => { let content = ''; let settled = false; const source = sftp.createReadStream(remoteFilePath); const finish = (error?: Error) => { if (settled) return; settled = true; if (error) { reject(error); return; } resolve(content.trim()); }; source.on('data', (chunk: Buffer | string) => { content += chunk.toString(); }); source.on('error', finish); source.on('close', () => finish()); }); const deleteRemoteFile = (sftp: SFTPWrapper, remoteFilePath: string) => new Promise((resolve) => { sftp.unlink(remoteFilePath, () => resolve()); }); export const transcribeOnWhisperVm = async ({ localFilePath, originalName }: TranscriptionInput): Promise => { const config = getConfig(); const audioDir = trimRemoteDir(config.audioDir); const transcriptDir = trimRemoteDir(config.transcriptDir); const extension = extname(originalName).toLowerCase() || '.webm'; const remoteBaseName = `${Date.now()}-${randomUUID()}-${safeBaseName( originalName )}`; const remoteSourcePath = posix.join( audioDir, `${remoteBaseName}.source${extension}` ); const remoteAudioPath = posix.join(audioDir, `${remoteBaseName}.wav`); const remoteTranscriptPath = posix.join(transcriptDir, `${remoteBaseName}.txt`); const client = await connectSsh(config); let sftp: SFTPWrapper | null = null; try { await execCommand( client, `mkdir -p ${shellQuote(audioDir)} ${shellQuote(transcriptDir)}`, 30_000 ); sftp = await getSftp(client); await uploadFile(sftp, localFilePath, remoteSourcePath); // Browser recordings are commonly WebM/Opus; the remote job is more reliable with WAV. await execCommand( client, [ config.ffmpegCommand, '-y', '-hide_banner', '-loglevel', 'error', '-i', shellQuote(remoteSourcePath), '-vn', '-ac', '1', '-ar', '16000', '-c:a', 'pcm_s16le', shellQuote(remoteAudioPath) ].join(' '), config.timeoutMs ); // Transcription runs remotely so the Node process stays lightweight. await execCommand( client, runInsideWhisperEnv( config, [ shellQuote(config.command), shellQuote(remoteAudioPath), '--model', shellQuote(config.model), '--language', shellQuote(config.language), '--task', 'transcribe', '--output_dir', shellQuote(transcriptDir), '--output_format', 'txt', '--fp16', 'False' ].join(' ') ), config.timeoutMs ); const transcript = await readRemoteFile(sftp, remoteTranscriptPath); if (!transcript) { throw new Error('The transcription service returned an empty transcript.'); } return { remoteAudioPath, remoteTranscriptPath, transcript }; } finally { if (sftp) { await deleteRemoteFile(sftp, remoteSourcePath); sftp.end(); } client.end(); } };