You are on page 1of 2

import streamlit as st

from transformers import MarianMTModel, MarianTokenizer


from docx import Document
import os
import string

def extract_names(text):
"""
Extract potential names from the given text.

Args:
text (str): Input text to extract names from.

Returns:
set: Set of potential names.
"""
potential_names = set(word.strip(string.punctuation) for word in
text.split() if word.istitle())
return potential_names

def translate_to_language(text, target_language):


"""
Translate the given text to the target language using the specified
model.

Args:
text (str): Input text to translate.
target_language (str): Target language code.

Returns:
str: Translated text.
"""
if not text.strip():
return ""
else:
model_name = f"Helsinki-NLP/opus-mt-en-{target_language}"
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)

inputs = tokenizer.encode(text, return_tensors="pt")


translated_ids = model.generate(inputs, max_length=100)[0]

translated_text = tokenizer.decode(translated_ids,
skip_special_tokens=True)
return translated_text

def translate_and_replace_names(input_docx, target_language,


name_replacements):
"""
Translate and replace names in a DOCX file.

Args:
input_docx (BytesIO): Input DOCX file.
target_language (str): Target language code.
name_replacements (dict): Dictionary of name replacements.

Returns:
Document: Translated and replaced DOCX document.
"""
doc = Document(input_docx)
translated_doc = Document()
for para in doc.paragraphs:
translated_text = para.text
for old_name, new_name in name_replacements.items():
translated_text = translated_text.replace(old_name, new_name)
translated_text = translate_to_language(translated_text,
target_language)
translated_doc.add_paragraph(translated_text)

return translated_doc

# Streamlit UI
st.title("DOCX Translation App")

# Dropdown for target language selection


target_language = st.selectbox("Select Target Language", ["es", "fr"]) #
Add more languages as needed

# Upload input DOCX file


uploaded_file = st.file_uploader("Upload Input DOCX", type=["docx"])

if uploaded_file:
doc = Document(uploaded_file)
doc_text = "\n".join([para.text for para in doc.paragraphs])

# Extract and display potential names from input DOCX


potential_names = extract_names(doc_text)
st.sidebar.header("Potential Names from Demo Script")
selected_names = st.sidebar.multiselect("Select Names to Replace:",
potential_names)

# Dictionary to store name replacements


name_replacement_dict = {}
for selected_name in selected_names:
new_name = st.sidebar.text_input(f"Replace '{selected_name}'
with:", "")
if new_name:
name_replacement_dict[selected_name] = new_name

# Translate and download button


if st.button("Translate and Download"):
translated_doc = translate_and_replace_names(uploaded_file,
target_language, name_replacement_dict)
output_filename = "translated_output.docx"
translated_doc.save(output_filename)
st.success("Translation and Name Replacement complete!")

# Input field for custom filename


custom_filename = st.text_input("Enter the filename:",
"translated_output.docx")

# Download button
if st.button("Download Translated DOCX"):
os.rename(output_filename, custom_filename)
st.download_button("Download", custom_filename,
key='download_button')

You might also like