from docx import Document import hashlib import time import requests import os class DocxTranslator: def __init__(self): self.appid = "20241112002200806" self.secret_key = "preM0becByYCdotRTP_a" self.api_url = "https://api.fanyi.baidu.com/api/trans/vip/translate" self.max_length = 2000 self.lang_map = { 'en': 'en', # English 'jp': 'jp', # Japanese 'cht': 'cht' # Traditional Chinese } def _get_sign(self, text, salt): sign_str = f"{self.appid}{text}{salt}{self.secret_key}" return hashlib.md5(sign_str.encode()).hexdigest() def translate_text(self, text, to_lang): target_lang = self.lang_map.get(to_lang, to_lang) if len(text) > self.max_length: segments = text.split('。') current_segment = '' translated_segments = [] for segment in segments: if len(current_segment) + len(segment) < self.max_length: current_segment += segment + '。' else: if current_segment: translated_segments.append(self._translate_segment(current_segment, target_lang)) current_segment = segment + '。' if current_segment: translated_segments.append(self._translate_segment(current_segment, target_lang)) return ''.join(translated_segments) else: return self._translate_segment(text, target_lang) def _translate_segment(self, text, to_lang): if not text.strip(): return text salt = str(int(time.time())) sign = self._get_sign(text, salt) params = { 'q': text, 'from': 'zh', 'to': to_lang, 'appid': self.appid, 'salt': salt, 'sign': sign } max_retries = 3 for attempt in range(max_retries): try: response = requests.get(self.api_url, params=params) result = response.json() if 'error_code' in result: if attempt < max_retries - 1: time.sleep(1) continue raise Exception(f"Translation error: {result['error_msg']}") return result['trans_result'][0]['dst'] except Exception as e: if attempt < max_retries - 1: time.sleep(1) continue raise e def translate_document(self, input_path, target_lang='en', progress_callback=None): doc = Document(input_path) output_path = input_path.replace('.docx', f'_translated.docx') try: # 计算总翻译项 total_items = len(doc.paragraphs) for table in doc.tables: total_items += sum(len(row.cells) for row in table.rows) current_item = 0 # 翻译段落 for paragraph in doc.paragraphs: if paragraph.text.strip(): try: # 保存原始的段落格式和换行 runs = paragraph.runs original_runs = [] # 收集每个run的文本和格式信息,保留原始换行符 for run in runs: text = run.text if text: # 不去除空白字符,保留原始格式 original_runs.append({ 'text': text, 'bold': run.bold, 'italic': run.italic, 'underline': run.underline, 'font_name': run.font.name, 'font_size': run.font.size, 'color_rgb': run.font.color.rgb if run.font.color else None, 'break_type': 'break' if any(text.endswith(x) for x in ['\n', '\v', '\r']) else None }) # 清除原有内容但保持段落格式 paragraph.clear() # 分别翻译和添加每个run的文本 for i, orig_run in enumerate(original_runs): if orig_run['text']: # 保留换行符 has_break = orig_run['break_type'] == 'break' text_to_translate = orig_run['text'].rstrip('\n\r\v') # 翻译非空文本 if text_to_translate.strip(): translated_text = self.translate_text(text_to_translate, target_lang) else: translated_text = text_to_translate # 创建新的run并应用格式 new_run = paragraph.add_run() new_run.bold = orig_run['bold'] new_run.italic = orig_run['italic'] new_run.underline = orig_run['underline'] new_run.font.name = orig_run['font_name'] if orig_run['font_size']: new_run.font.size = orig_run['font_size'] if orig_run['color_rgb']: new_run.font.color.rgb = orig_run['color_rgb'] # 添加翻译后的文本 new_run.text = translated_text # 如果原文有换行,添加换行符 if has_break: new_run.add_break() # 不指定类型,使用默认换行 except Exception as e: print(f"Error translating paragraph: {e}") # 保留原文和格式 paragraph.clear() for run in runs: new_run = paragraph.add_run(run.text) new_run.bold = run.bold new_run.italic = run.italic new_run.underline = run.underline new_run.font.name = run.font.name if run.font.size: new_run.font.size = run.font.size if run.font.color and run.font.color.rgb: new_run.font.color.rgb = run.font.color.rgb current_item += 1 if progress_callback: progress_callback(int((current_item / total_items) * 100)) # 翻译表格 (使用相同的逻辑处理表格单元格) for table in doc.tables: for row in table.rows: for cell in row.cells: if cell.text.strip(): try: for paragraph in cell.paragraphs: runs = paragraph.runs original_runs = [] for run in runs: text = run.text if text: original_runs.append({ 'text': text, 'bold': run.bold, 'italic': run.italic, 'underline': run.underline, 'font_name': run.font.name, 'font_size': run.font.size, 'color_rgb': run.font.color.rgb if run.font.color else None, 'break_type': 'break' if any(text.endswith(x) for x in ['\n', '\v', '\r']) else None }) paragraph.clear() for i, orig_run in enumerate(original_runs): if orig_run['text']: has_break = orig_run['break_type'] == 'break' text_to_translate = orig_run['text'].rstrip('\n\r\v') if text_to_translate.strip(): translated_text = self.translate_text(text_to_translate, target_lang) else: translated_text = text_to_translate new_run = paragraph.add_run() new_run.bold = orig_run['bold'] new_run.italic = orig_run['italic'] new_run.underline = orig_run['underline'] new_run.font.name = orig_run['font_name'] if orig_run['font_size']: new_run.font.size = orig_run['font_size'] if orig_run['color_rgb']: new_run.font.color.rgb = orig_run['color_rgb'] new_run.text = translated_text if has_break: new_run.add_break() except Exception as e: print(f"Error translating cell: {e}") # 保留原文和格式 for paragraph in cell.paragraphs: paragraph.clear() for run in runs: new_run = paragraph.add_run(run.text) new_run.bold = run.bold new_run.italic = run.italic new_run.underline = run.underline new_run.font.name = run.font.name if run.font.size: new_run.font.size = run.font.size if run.font.color and run.font.color.rgb: new_run.font.color.rgb = run.font.color.rgb current_item += 1 if progress_callback: progress_callback(int((current_item / total_items) * 100)) doc.save(output_path) return output_path finally: del doc