357 lines
9.2 KiB
TypeScript
357 lines
9.2 KiB
TypeScript
import { randomUUID } from 'node:crypto';
|
|
import { createReadStream } from 'node:fs';
|
|
import { extname, posix } from 'node:path';
|
|
import { Client } from 'ssh2';
|
|
import type { ClientChannel, ConnectConfig, SFTPWrapper } from 'ssh2';
|
|
|
|
interface WhisperConfig {
|
|
audioDir: string;
|
|
command: string;
|
|
envActivatePath: string;
|
|
envName: string;
|
|
ffmpegCommand: string;
|
|
host: string;
|
|
language: string;
|
|
model: string;
|
|
password: string;
|
|
port: number;
|
|
sshReadyTimeoutMs: number;
|
|
timeoutMs: number;
|
|
transcriptDir: string;
|
|
username: string;
|
|
}
|
|
|
|
interface TranscriptionInput {
|
|
localFilePath: string;
|
|
originalName: string;
|
|
}
|
|
|
|
interface TranscriptionOutput {
|
|
remoteAudioPath: string;
|
|
remoteTranscriptPath: string;
|
|
transcript: string;
|
|
}
|
|
|
|
const requiredEnv = (name: string, fallback?: string) => {
|
|
const value = process.env[name] ?? fallback;
|
|
|
|
if (!value) {
|
|
throw new Error(`Missing required environment variable: ${name}`);
|
|
}
|
|
|
|
return value;
|
|
};
|
|
|
|
const getConfig = (): WhisperConfig => ({
|
|
audioDir: requiredEnv('WHISPER_VM_AUDIO_DIR', '/home/kevin/mom_audio'),
|
|
command: requiredEnv('WHISPER_COMMAND', 'whisper'),
|
|
envActivatePath: requiredEnv(
|
|
'WHISPER_ENV_ACTIVATE',
|
|
'/home/kevin/whisper-env/bin/activate'
|
|
),
|
|
envName: requiredEnv('WHISPER_ENV_NAME', 'whisper-env'),
|
|
ffmpegCommand: requiredEnv('FFMPEG_COMMAND', 'ffmpeg'),
|
|
host: requiredEnv('WHISPER_VM_HOST', '172.16.10.51'),
|
|
language: requiredEnv('WHISPER_LANGUAGE', 'English'),
|
|
model: requiredEnv('WHISPER_MODEL', 'medium'),
|
|
password: requiredEnv('WHISPER_VM_PASSWORD'),
|
|
port: Number(process.env.WHISPER_VM_PORT ?? 22),
|
|
sshReadyTimeoutMs: Number(process.env.WHISPER_SSH_READY_TIMEOUT_MS ?? 60_000),
|
|
timeoutMs: Number(process.env.WHISPER_TIMEOUT_MS ?? 1_800_000),
|
|
transcriptDir: requiredEnv(
|
|
'WHISPER_VM_TRANSCRIPT_DIR',
|
|
'/home/kevin/mom_transcripts'
|
|
),
|
|
username: requiredEnv('WHISPER_VM_USER', 'kevin')
|
|
});
|
|
|
|
const trimRemoteDir = (dir: string) => dir.replace(/\/+$/, '');
|
|
|
|
const shellQuote = (value: string) => `'${value.replace(/'/g, "'\\''")}'`;
|
|
|
|
const runInsideWhisperEnv = (config: WhisperConfig, command: string) => {
|
|
const script = [
|
|
'set -e',
|
|
`if [ -f ${shellQuote(config.envActivatePath)} ]; then`,
|
|
` . ${shellQuote(config.envActivatePath)}`,
|
|
'elif command -v conda >/dev/null 2>&1; then',
|
|
' eval "$(conda shell.bash hook)"',
|
|
` conda activate ${shellQuote(config.envName)}`,
|
|
'else',
|
|
` echo ${shellQuote(
|
|
'Unable to activate the transcription environment. Check the remote activation path.'
|
|
)} >&2`,
|
|
' exit 127',
|
|
'fi',
|
|
command
|
|
].join('\n');
|
|
|
|
return `bash -lc ${shellQuote(script)}`;
|
|
};
|
|
|
|
const safeBaseName = (fileName: string) => {
|
|
const withoutExtension = fileName.replace(/\.[^.]+$/, '');
|
|
const safe = withoutExtension
|
|
.replace(/[^a-zA-Z0-9._-]+/g, '-')
|
|
.replace(/^-+|-+$/g, '')
|
|
.slice(0, 80);
|
|
|
|
return safe || 'meeting-audio';
|
|
};
|
|
|
|
const connectSsh = (config: WhisperConfig) =>
|
|
new Promise<Client>((resolve, reject) => {
|
|
const client = new Client();
|
|
const connection: ConnectConfig = {
|
|
host: config.host,
|
|
keepaliveInterval: 15_000,
|
|
password: config.password,
|
|
port: config.port,
|
|
readyTimeout: config.sshReadyTimeoutMs,
|
|
username: config.username
|
|
};
|
|
|
|
client
|
|
.on('ready', () => resolve(client))
|
|
.on('error', (error) => {
|
|
const message =
|
|
error instanceof Error
|
|
? error.message
|
|
: 'Unknown SSH connection error.';
|
|
|
|
reject(
|
|
new Error(
|
|
`Unable to connect to the transcription service at ${config.host}:${config.port}. ` +
|
|
`Check that the VM is powered on, reachable from this machine, and accepting SSH. Details: ${message}`
|
|
)
|
|
);
|
|
})
|
|
.connect(connection);
|
|
});
|
|
|
|
const getSftp = (client: Client) =>
|
|
new Promise<SFTPWrapper>((resolve, reject) => {
|
|
client.sftp((error, sftp) => {
|
|
if (error) {
|
|
reject(error);
|
|
return;
|
|
}
|
|
|
|
resolve(sftp);
|
|
});
|
|
});
|
|
|
|
const execCommand = (client: Client, command: string, timeoutMs: number) =>
|
|
new Promise<string>((resolve, reject) => {
|
|
let stream: ClientChannel | null = null;
|
|
let stdout = '';
|
|
let stderr = '';
|
|
let settled = false;
|
|
|
|
const finish = (error?: Error, output?: string) => {
|
|
if (settled) return;
|
|
settled = true;
|
|
clearTimeout(timeout);
|
|
|
|
if (error) {
|
|
reject(error);
|
|
return;
|
|
}
|
|
|
|
resolve(output ?? '');
|
|
};
|
|
|
|
const timeout = setTimeout(() => {
|
|
stream?.close();
|
|
finish(new Error('Transcription timed out on the processing machine.'));
|
|
}, timeoutMs);
|
|
|
|
client.exec(command, (error, commandStream) => {
|
|
if (error) {
|
|
finish(error);
|
|
return;
|
|
}
|
|
|
|
stream = commandStream;
|
|
commandStream.on('data', (chunk: Buffer) => {
|
|
stdout += chunk.toString();
|
|
});
|
|
commandStream.stderr.on('data', (chunk: Buffer) => {
|
|
stderr += chunk.toString();
|
|
});
|
|
commandStream.on('close', (code: number | null) => {
|
|
if (code && code !== 0) {
|
|
finish(
|
|
new Error(
|
|
stderr.trim() ||
|
|
stdout.trim() ||
|
|
`Transcription failed with exit code ${code}.`
|
|
)
|
|
);
|
|
return;
|
|
}
|
|
|
|
finish(undefined, `${stdout}${stderr}`.trim());
|
|
});
|
|
});
|
|
});
|
|
|
|
const uploadFile = (
|
|
sftp: SFTPWrapper,
|
|
localFilePath: string,
|
|
remoteFilePath: string
|
|
) =>
|
|
new Promise<void>((resolve, reject) => {
|
|
let settled = false;
|
|
const source = createReadStream(localFilePath);
|
|
const target = sftp.createWriteStream(remoteFilePath, {
|
|
flags: 'w',
|
|
mode: 0o640
|
|
});
|
|
|
|
const finish = (error?: Error) => {
|
|
if (settled) return;
|
|
settled = true;
|
|
|
|
if (error) {
|
|
reject(error);
|
|
return;
|
|
}
|
|
|
|
resolve();
|
|
};
|
|
|
|
source.on('error', finish);
|
|
target.on('error', finish);
|
|
target.on('close', () => finish());
|
|
source.pipe(target);
|
|
});
|
|
|
|
const readRemoteFile = (sftp: SFTPWrapper, remoteFilePath: string) =>
|
|
new Promise<string>((resolve, reject) => {
|
|
let content = '';
|
|
let settled = false;
|
|
const source = sftp.createReadStream(remoteFilePath);
|
|
|
|
const finish = (error?: Error) => {
|
|
if (settled) return;
|
|
settled = true;
|
|
|
|
if (error) {
|
|
reject(error);
|
|
return;
|
|
}
|
|
|
|
resolve(content.trim());
|
|
};
|
|
|
|
source.on('data', (chunk: Buffer | string) => {
|
|
content += chunk.toString();
|
|
});
|
|
source.on('error', finish);
|
|
source.on('close', () => finish());
|
|
});
|
|
|
|
const deleteRemoteFile = (sftp: SFTPWrapper, remoteFilePath: string) =>
|
|
new Promise<void>((resolve) => {
|
|
sftp.unlink(remoteFilePath, () => resolve());
|
|
});
|
|
|
|
export const transcribeOnWhisperVm = async ({
|
|
localFilePath,
|
|
originalName
|
|
}: TranscriptionInput): Promise<TranscriptionOutput> => {
|
|
const config = getConfig();
|
|
const audioDir = trimRemoteDir(config.audioDir);
|
|
const transcriptDir = trimRemoteDir(config.transcriptDir);
|
|
const extension = extname(originalName).toLowerCase() || '.webm';
|
|
const remoteBaseName = `${Date.now()}-${randomUUID()}-${safeBaseName(
|
|
originalName
|
|
)}`;
|
|
const remoteSourcePath = posix.join(
|
|
audioDir,
|
|
`${remoteBaseName}.source${extension}`
|
|
);
|
|
const remoteAudioPath = posix.join(audioDir, `${remoteBaseName}.wav`);
|
|
const remoteTranscriptPath = posix.join(transcriptDir, `${remoteBaseName}.txt`);
|
|
const client = await connectSsh(config);
|
|
let sftp: SFTPWrapper | null = null;
|
|
|
|
try {
|
|
await execCommand(
|
|
client,
|
|
`mkdir -p ${shellQuote(audioDir)} ${shellQuote(transcriptDir)}`,
|
|
30_000
|
|
);
|
|
|
|
sftp = await getSftp(client);
|
|
await uploadFile(sftp, localFilePath, remoteSourcePath);
|
|
|
|
// Browser recordings are commonly WebM/Opus; the remote job is more reliable with WAV.
|
|
await execCommand(
|
|
client,
|
|
[
|
|
config.ffmpegCommand,
|
|
'-y',
|
|
'-hide_banner',
|
|
'-loglevel',
|
|
'error',
|
|
'-i',
|
|
shellQuote(remoteSourcePath),
|
|
'-vn',
|
|
'-ac',
|
|
'1',
|
|
'-ar',
|
|
'16000',
|
|
'-c:a',
|
|
'pcm_s16le',
|
|
shellQuote(remoteAudioPath)
|
|
].join(' '),
|
|
config.timeoutMs
|
|
);
|
|
|
|
// Transcription runs remotely so the Node process stays lightweight.
|
|
await execCommand(
|
|
client,
|
|
runInsideWhisperEnv(
|
|
config,
|
|
[
|
|
shellQuote(config.command),
|
|
shellQuote(remoteAudioPath),
|
|
'--model',
|
|
shellQuote(config.model),
|
|
'--language',
|
|
shellQuote(config.language),
|
|
'--task',
|
|
'transcribe',
|
|
'--output_dir',
|
|
shellQuote(transcriptDir),
|
|
'--output_format',
|
|
'txt',
|
|
'--fp16',
|
|
'False'
|
|
].join(' ')
|
|
),
|
|
config.timeoutMs
|
|
);
|
|
|
|
const transcript = await readRemoteFile(sftp, remoteTranscriptPath);
|
|
|
|
if (!transcript) {
|
|
throw new Error('The transcription service returned an empty transcript.');
|
|
}
|
|
|
|
return {
|
|
remoteAudioPath,
|
|
remoteTranscriptPath,
|
|
transcript
|
|
};
|
|
} finally {
|
|
if (sftp) {
|
|
await deleteRemoteFile(sftp, remoteSourcePath);
|
|
sftp.end();
|
|
}
|
|
client.end();
|
|
}
|
|
};
|