# ruff: noqa import os from openai import OpenAI from concurrent.futures import ThreadPoolExecutor # Define the source and target directories source_dir = "docs" languages = { "ja": "Japanese", # Add more languages here, e.g., "fr": "French" } # Initialize OpenAI client openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # Define dictionaries for translation control do_not_translate = [ "OpenAI", "Agents SDK", "Hello World", "Model Context Protocol", "structured outputs", # Add more terms here ] eng_to_non_eng_mapping = { "ja": { "agents": "エージェント", "computer use": "コンピュータ操作", "OAI hosted tools": "OpenAI がホストするツール", "well formed data": "適切な形式のデータ", "guardrail": "ガードレール", "handoffs": "ハンドオフ", "function tools": "関数ツール", "tracing": "トレーシング", "code examples": "コード例", "vector store": "ベクトルストア", # Add more Japanese mappings here }, # Add more languages here } eng_to_non_eng_instructions = { "ja": { "The term 'result' in the Runner guide context must be translated like 'execution results'", "The term 'raw' in 'raw response events' must be kept as is", "The term 'examples' must be code examples when the page mentions the code examples in the repo, it can be translated as either 'code exmaples' or 'sample code'.", "The term 'primitives' can be translated as basic components or building blocks.", "When the terms 'instructions' and 'tools' are mentioned as API parameter names, they must be kept as is.", # Add more Japanese mappings here }, # Add more languages here } def built_instructions(target_language: str, lang_code: str) -> str: do_not_translate_terms = "\n".join(do_not_translate) specific_terms = "\n".join( [f"{k} -> {v}" for k, v in eng_to_non_eng_mapping.get(lang_code, {}).items()] ) specific_instructions = "\n".join(eng_to_non_eng_instructions.get(lang_code, {})) return f"""You are a professional translator with extensive experience in translating technical documents. You are assigned to translate markdown text written in English into {target_language}. The tone and voice must be concise, consistent, and most importantly professional. You must return only the generated markdown text. Don't include any additional comments. When you're unable to complete full translation, return an error message indicating the reason instead of returning partial results. # Do not translate {do_not_translate_terms} # Specific term mappings When you convert these terms, do not append whitespaces before/after the terms. {specific_terms} {specific_instructions} # Other Rules - When translating into Japanese, ensure there are spaces before and after alphanumeric terms and markdown special characters like italic and bold. - When translating very uncommon technical terms, include both the translated term and the original term in parentheses. That said, the section titles should be as simple as possible. - You must skip translating any parts of code snippets and code comments - "./assets/*" needs to be converted to "../assets/*"; markdown files like ./tracing.md can be kept as is. """ # Function to translate and save files def translate_file(file_path: str, target_path: str, lang_code: str) -> None: print(f"Translating {file_path} into a different language: {lang_code}") with open(file_path, encoding="utf-8") as f: content = f.read() # Split content into lines lines: list[str] = content.splitlines() chunks: list[str] = [] current_chunk: list[str] = [] # Split content into chunks of up to 120 lines, ensuring splits occur before section titles for line in lines: if len(current_chunk) >= 120 and line.startswith("#"): chunks.append("\n".join(current_chunk)) current_chunk = [] current_chunk.append(line) if current_chunk: chunks.append("\n".join(current_chunk)) # Translate each chunk separately and combine results translated_content: list[str] = [] for chunk in chunks: response = openai_client.responses.create( model="gpt-4.5-preview", temperature=0.0, instructions=built_instructions(languages[lang_code], lang_code), input=chunk, ) translated_content.append(response.output_text) # Save the combined translated content with open(target_path, "w", encoding="utf-8") as f: f.write("\n".join(translated_content)) def translate_single_source_file(file_path: str) -> None: relative_path = os.path.relpath(file_path, source_dir) if "ref/" in relative_path or not file_path.endswith(".md"): return for lang_code in languages: target_dir = os.path.join(source_dir, lang_code) target_path = os.path.join(target_dir, relative_path) # Ensure the target directory exists os.makedirs(os.path.dirname(target_path), exist_ok=True) # Translate and save the file translate_file(file_path, target_path, lang_code) def main(): # Traverse the source directory for root, _, file_names in os.walk(source_dir): # Skip the target directories if any(lang in root for lang in languages): continue with ThreadPoolExecutor(max_workers=20) as executor: futures = [ executor.submit( translate_single_source_file, os.path.join(root, file_name), ) for file_name in file_names ] for future in futures: future.result() print("Translation completed.") if __name__ == "__main__": # translate_single_source_file("docs/tools.md") main()