function script

This commit is contained in:
2025-09-05 00:43:20 +08:00
parent 96c7f5b347
commit 6c6b19e03e
5 changed files with 1864 additions and 0 deletions

View File

@@ -0,0 +1,211 @@
"""
优化后的数字人生成API
按照新的文件管理架构设计
"""
import requests
import os
import time
import logging
from typing import Dict, Any
from api import Config, APIException, _make_request, _copy_file
logger = logging.getLogger(__name__)
def generate_digital_human_v2(speech_text: str, sample_video: str, sample_voice: str, uuid: str) -> Dict[str, Any]:
"""
生成数字人 (优化版本)
Args:
speech_text: 语音文本
sample_video: 样本视频文件名(在上传目录中)
sample_voice: 样本语音文件名(在上传目录中)
uuid: 唯一标识符
Returns:
生成结果
Raises:
APIException: 生成失败时抛出异常
"""
logger.info(f"Generating digital human for UUID: {uuid}")
try:
from file_upload import file_manager
# 步骤1: 将音频文件复制到TTS服务目录进行预处理
tts_audio_filename = file_manager.copy_audio_for_tts(sample_voice, f"dh_{uuid}")
# 步骤2: 预处理和训练语音模型
url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
request_body_one = {
"format": "wav",
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
"lang": "zh"
}
response_one = _make_request(url, request_body_one)
# 检查响应中是否包含必要字段
if 'asr_format_audio_url' not in response_one:
logger.error(f"Voice preprocessing response missing asr_format_audio_url: {response_one}")
raise APIException(f"Voice preprocessing failed: missing asr_format_audio_url in response", 500)
asr_format_audio_url = response_one['asr_format_audio_url']
reference_audio_text = response_one.get('reference_audio_text', '')
# 步骤3: 生成语音
url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
request_body_two = {
"speaker": uuid,
"text": speech_text,
"reference_audio": asr_format_audio_url,
"reference_text": reference_audio_text,
**Config.DEFAULT_VOICE_PARAMS
}
response = requests.post(url, json=request_body_two, timeout=60)
logger.info(f"Voice generation response status: {response.status_code}")
if response.status_code != 200:
logger.error(f"Voice generation failed: {response.text}")
raise APIException(f"Voice generation failed: {response.text}", response.status_code)
# 保存生成的音频文件到resource目录
generated_audio_filename = f"{uuid}output.wav"
generated_audio_path = os.path.join(Config.RESOURCE_DIR, generated_audio_filename)
with open(generated_audio_path, "wb") as f:
f.write(response.content)
logger.info(f"Generated audio saved: {generated_audio_path}")
# 步骤4: 将视频和生成的音频复制到Face2Face服务目录
# 首先将生成的音频从resource目录复制到uploads目录临时
upload_audio_dir = "/mnt/docker/resource/uploads/audio"
os.makedirs(upload_audio_dir, exist_ok=True)
upload_audio_path = os.path.join(upload_audio_dir, generated_audio_filename)
_copy_file(generated_audio_path, upload_audio_path)
face2face_video, face2face_audio = file_manager.copy_files_for_face2face(
sample_video, generated_audio_filename, uuid
)
# 步骤5: 提交视频生成任务
url = f"{Config.VIDEO_SERVICE_URL}/easy/submit"
request_body = {
"audio_url": face2face_audio,
"video_url": face2face_video,
"code": str(int(time.time())), # 使用时间戳作为唯一任务ID
"chaofen": 0,
"watermark_switch": 0,
"pn": 1
}
result = _make_request(url, request_body)
logger.info(f"Digital human generation submitted successfully: {result}")
# 清理临时文件
try:
os.remove(upload_audio_path)
except:
pass
return result
except Exception as e:
logger.error(f"Failed to generate digital human: {str(e)}")
if isinstance(e, APIException):
raise
raise APIException(f"Failed to generate digital human: {str(e)}", 500)
def generate_voice_v2(text: str, reference_audio: str, reference_text: str, uuid: str) -> str:
"""
生成语音 (优化版本)
Args:
text: 要转换的文本
reference_audio: 参考音频文件名(在上传目录中)
reference_text: 参考文本
uuid: 唯一标识符
Returns:
生成的音频文件路径在resource目录中
Raises:
APIException: 生成失败时抛出异常
"""
logger.info(f"Generating voice for UUID: {uuid}")
try:
from file_upload import file_manager
# 先将参考音频复制到TTS服务目录
tts_audio_filename = file_manager.copy_audio_for_tts(reference_audio, uuid)
url = f"{Config.VOICE_SERVICE_URL}/v1/invoke"
request_body = {
"speaker": uuid,
"text": text,
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
"reference_text": reference_text,
**Config.DEFAULT_VOICE_PARAMS
}
response = requests.post(url, json=request_body, timeout=60)
logger.info(f"Voice generation response status: {response.status_code}")
if response.status_code == 200:
# 保存生成的音频到resource目录
output_filename = f"{uuid}output.wav"
output_path = os.path.join(Config.RESOURCE_DIR, output_filename)
with open(output_path, "wb") as f:
f.write(response.content)
logger.info(f"Generated voice saved to: {output_path}")
return output_path
else:
logger.error(f"Voice generation failed: {response.text}")
raise APIException(f"Voice generation failed: {response.text}", response.status_code)
except Exception as e:
if isinstance(e, APIException):
raise
logger.error(f"Network error during voice generation: {str(e)}")
raise APIException(f"Voice generation error: {str(e)}", 500)
def train_voice_v2(voice_file_name: str) -> Dict[str, Any]:
"""
训练语音模型 (优化版本)
Args:
voice_file_name: 语音文件名(在上传目录中)
Returns:
训练结果
Raises:
APIException: 训练失败时抛出异常
"""
logger.info(f"Training voice model with file: {voice_file_name}")
try:
from file_upload import file_manager
# 将音频文件复制到TTS服务目录
temp_uuid = "train_" + str(int(time.time()))
tts_audio_filename = file_manager.copy_audio_for_tts(voice_file_name, temp_uuid)
url = f"{Config.VOICE_SERVICE_URL}/v1/preprocess_and_tran"
request_body = {
"format": "wav",
"reference_audio": tts_audio_filename, # 使用TTS目录中的文件名
"lang": "zh"
}
return _make_request(url, request_body)
except Exception as e:
if isinstance(e, APIException):
raise
logger.error(f"Voice training error: {str(e)}")
raise APIException(f"Voice training error: {str(e)}", 500)