Your Name e4959d584f feat: 完善代理商端业务逻辑与前后端框架
主要更新:
- 更新代理商端文档,明确项目由品牌方分配流程
- 新增Brief配置详情页(已配置)设计稿
- 完善工作台紧急待办中品牌新任务功能
- 整理Pencil设计文件中代理商端页面顺序
- 新增后端FastAPI框架及核心API
- 新增前端Next.js页面和组件库
- 添加.gitignore排除构建和缓存文件

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 19:27:31 +08:00

311 lines
8.4 KiB
Python

"""
ASR 语音转写服务
集成 Whisper API 实现音频转写
"""
import asyncio
import os
import tempfile
from dataclasses import dataclass, field
from typing import Optional
import httpx
@dataclass
class TranscriptSegment:
"""转写片段"""
text: str
start: float # 开始时间(秒)
end: float # 结束时间(秒)
confidence: float = 1.0
@dataclass
class TranscriptionResult:
"""转写结果"""
success: bool
text: str = "" # 完整文本
segments: list[TranscriptSegment] = field(default_factory=list)
language: str = "zh"
duration: float = 0.0
error: Optional[str] = None
class ASRService:
"""ASR 语音转写服务"""
def __init__(
self,
api_key: str,
base_url: str = "https://api.openai.com/v1",
model: str = "whisper-1",
timeout: float = 300.0,
):
"""
初始化 ASR 服务
Args:
api_key: API Key
base_url: API 基础 URL
model: 模型名称
timeout: 请求超时(秒)
"""
self.api_key = api_key
self.base_url = base_url.rstrip("/")
self.model = model
self.timeout = timeout
async def transcribe_file(
self,
audio_path: str,
language: str = "zh",
response_format: str = "verbose_json",
) -> TranscriptionResult:
"""
转写音频文件
Args:
audio_path: 音频文件路径
language: 语言代码
response_format: 响应格式
Returns:
TranscriptionResult: 转写结果
"""
if not os.path.exists(audio_path):
return TranscriptionResult(
success=False,
error=f"文件不存在: {audio_path}",
)
try:
async with httpx.AsyncClient(
timeout=httpx.Timeout(self.timeout)
) as client:
with open(audio_path, "rb") as f:
files = {"file": (os.path.basename(audio_path), f, "audio/mpeg")}
data = {
"model": self.model,
"language": language,
"response_format": response_format,
}
response = await client.post(
f"{self.base_url}/audio/transcriptions",
headers={"Authorization": f"Bearer {self.api_key}"},
files=files,
data=data,
)
if response.status_code != 200:
return TranscriptionResult(
success=False,
error=f"API 错误 {response.status_code}: {response.text[:200]}",
)
result = response.json()
return self._parse_response(result, language)
except Exception as e:
return TranscriptionResult(
success=False,
error=str(e),
)
async def transcribe_url(
self,
audio_url: str,
language: str = "zh",
) -> TranscriptionResult:
"""
转写远程音频
Args:
audio_url: 音频 URL
language: 语言代码
Returns:
TranscriptionResult: 转写结果
"""
# 下载音频到临时文件
temp_path = None
try:
async with httpx.AsyncClient(
timeout=httpx.Timeout(60.0),
follow_redirects=True,
) as client:
response = await client.get(audio_url)
if response.status_code != 200:
return TranscriptionResult(
success=False,
error=f"下载音频失败: HTTP {response.status_code}",
)
# 写入临时文件
with tempfile.NamedTemporaryFile(
suffix=".mp3",
delete=False,
) as f:
f.write(response.content)
temp_path = f.name
# 转写
result = await self.transcribe_file(temp_path, language)
return result
except Exception as e:
return TranscriptionResult(
success=False,
error=str(e),
)
finally:
# 清理临时文件
if temp_path and os.path.exists(temp_path):
try:
os.remove(temp_path)
except OSError:
pass
def _parse_response(
self,
response: dict,
language: str,
) -> TranscriptionResult:
"""解析 API 响应"""
text = response.get("text", "")
duration = response.get("duration", 0.0)
segments = []
for seg in response.get("segments", []):
segments.append(TranscriptSegment(
text=seg.get("text", "").strip(),
start=seg.get("start", 0.0),
end=seg.get("end", 0.0),
confidence=seg.get("confidence", 1.0) if "confidence" in seg else 1.0,
))
# 如果没有分段信息,创建单个分段
if not segments and text:
segments = [TranscriptSegment(
text=text,
start=0.0,
end=duration,
)]
return TranscriptionResult(
success=True,
text=text,
segments=segments,
language=language,
duration=duration,
)
class AudioExtractor:
"""从视频中提取音频"""
def __init__(self, ffmpeg_path: str = "ffmpeg"):
self.ffmpeg_path = ffmpeg_path
async def extract_audio(
self,
video_path: str,
output_path: Optional[str] = None,
format: str = "mp3",
sample_rate: int = 16000,
) -> Optional[str]:
"""
从视频中提取音频
Args:
video_path: 视频文件路径
output_path: 输出路径,默认生成临时文件
format: 输出格式
sample_rate: 采样率
Returns:
音频文件路径,失败返回 None
"""
import shutil
if not shutil.which(self.ffmpeg_path):
return None
if output_path is None:
output_path = tempfile.mktemp(suffix=f".{format}")
cmd = [
self.ffmpeg_path,
"-i", video_path,
"-vn", # 不要视频
"-acodec", "libmp3lame" if format == "mp3" else "pcm_s16le",
"-ar", str(sample_rate),
"-ac", "1", # 单声道
"-y",
output_path,
]
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
_, stderr = await process.communicate()
if process.returncode != 0:
return None
return output_path
except Exception:
return None
class VideoASRService:
"""视频 ASR 服务(组合音频提取和转写)"""
def __init__(
self,
api_key: str,
base_url: str = "https://api.openai.com/v1",
model: str = "whisper-1",
):
self.asr = ASRService(api_key, base_url, model)
self.audio_extractor = AudioExtractor()
async def transcribe_video(
self,
video_path: str,
language: str = "zh",
) -> TranscriptionResult:
"""
转写视频中的语音
Args:
video_path: 视频文件路径
language: 语言代码
Returns:
TranscriptionResult: 转写结果
"""
# 提取音频
audio_path = await self.audio_extractor.extract_audio(video_path)
if not audio_path:
return TranscriptionResult(
success=False,
error="音频提取失败,请确保 FFmpeg 已安装",
)
try:
# 转写
result = await self.asr.transcribe_file(audio_path, language)
return result
finally:
# 清理临时音频
if os.path.exists(audio_path):
try:
os.remove(audio_path)
except OSError:
pass