commit 40ad24f27c9431088dd774e814be888733255973 Author: McElwain Date: Wed Apr 1 08:30:39 2026 -0500 initial import of transcriber diff --git a/audiotext.spec b/audiotext.spec new file mode 100644 index 0000000..f27ab45 --- /dev/null +++ b/audiotext.spec @@ -0,0 +1,126 @@ +# -*- mode: python ; coding: utf-8 -*- +from os.path import join +from platform import system +from PyInstaller.utils.hooks import copy_metadata +from PyInstaller.utils.hooks import collect_data_files +from shutil import copyfile + +datas = [ + (r'venv/Lib/site-packages/customtkinter', 'customtkinter'), + (r'venv/Lib/site-packages/transformers', 'transformers'), + (r'venv/Lib/site-packages/lightning', 'lightning'), + (r'venv/Lib/site-packages/lightning_fabric', 'lightning_fabric'), + (r'venv/Lib/site-packages/speechbrain', 'speechbrain'), + (r'venv/Lib/site-packages/pyannote', 'pyannote'), + (r'venv/Lib/site-packages/asteroid_filterbanks', 'asteroid_filterbanks'), + (r'venv/Lib/site-packages/whisperx', 'whisperx'), + ('res', 'res') +] + +datas += copy_metadata('torch') +datas += copy_metadata('tqdm', recursive=True) +datas += copy_metadata('regex') +datas += copy_metadata('requests') +datas += copy_metadata('packaging') +datas += copy_metadata('filelock') +datas += copy_metadata('numpy') +datas += copy_metadata('tokenizers') +datas += copy_metadata('pillow') +datas += copy_metadata('huggingface_hub') +datas += copy_metadata('safetensors') +datas += copy_metadata('pyyaml') +datas += collect_data_files('librosa') + +block_cipher = None + +a = Analysis( + ['src/app.py'], + pathex=[], + binaries=[], + datas=datas, + hiddenimports=['huggingface_hub.repository', 'pytorch', 'sklearn.utils._cython_blas', 'sklearn.neighbors.typedefs', 'sklearn.neighbors.quad_tree', 'sklearn.tree', 'sklearn.tree._utils'], + hookspath=[], + hooksconfig={}, + runtime_hooks=[], + excludes=[], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher, + noarchive=False, +) + +# Filter out unused and/or duplicate shared libs +torch_lib_paths = { + join('torch', 'lib', 'libtorch_cuda.so'), + join('torch', 'lib', 'libtorch_cpu.so'), +} +a.datas = [entry for entry in a.datas if not entry[0] in torch_lib_paths] + +os_path_separator = '\\' if system() == 'Windows' else '/' +a.datas = [entry for entry in a.datas if not f'torch{os_path_separator}_C.cp' in entry[0]] +a.datas = [entry for entry in a.datas if not f'torch{os_path_separator}_dl.cp' in entry[0]] + +pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) + +if system() == 'Darwin': # macOS + exe = EXE( + pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + [], + name='Audiotext', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + upx_exclude=[], + runtime_tmpdir=None, + console=False, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch='x86_64', + codesign_identity=None, + entitlements_file=None, + icon=['res/img/icon.icns'], + ) + + # BUNDLE statement is used to create a macOS application bundle (.app) for the program + app = BUNDLE( + exe, + name='Audiotext.app', + icon=['res/img/icon.icns'], + bundle_identifier=None, + ) +else: + exe = EXE( + pyz, + a.scripts, + [], + exclude_binaries=True, + name='Audiotext', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + console=False, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch='x86_64', + codesign_identity=None, + entitlements_file=None, + icon=['res/img/icon.ico'], + ) + coll = COLLECT( + exe, + a.binaries, + a.zipfiles, + a.datas, + strip=False, + upx=True, + upx_exclude=[], + name='audiotext', + ) + +copyfile('config.ini', '{0}/audiotext/config.ini'.format(DISTPATH)) \ No newline at end of file diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..650fd09 --- /dev/null +++ b/config.ini @@ -0,0 +1,15 @@ +[whisperx] +model_size = large-v2 +batch_size = 8 +compute_type = int8 +use_cpu = True +can_use_gpu = False + +[google_api] +api_key = + +[subtitles] +highlight_words = False +max_line_width = 2 +max_line_count = 42 + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f502edf Binary files /dev/null and b/requirements.txt differ diff --git a/res/img/file-explorer.png b/res/img/file-explorer.png new file mode 100644 index 0000000..f758f5c Binary files /dev/null and b/res/img/file-explorer.png differ diff --git a/res/img/icon-dark.png b/res/img/icon-dark.png new file mode 100644 index 0000000..bdc10fd Binary files /dev/null and b/res/img/icon-dark.png differ diff --git a/res/img/icon-light.png b/res/img/icon-light.png new file mode 100644 index 0000000..1ea0b38 Binary files /dev/null and b/res/img/icon-light.png differ diff --git a/res/img/icon.icns b/res/img/icon.icns new file mode 100644 index 0000000..64dbf57 Binary files /dev/null and b/res/img/icon.icns differ diff --git a/res/img/icon.ico b/res/img/icon.ico new file mode 100644 index 0000000..784a78c Binary files /dev/null and b/res/img/icon.ico differ diff --git a/res/locales/en/LC_MESSAGES/app.mo b/res/locales/en/LC_MESSAGES/app.mo new file mode 100644 index 0000000..244ea64 Binary files /dev/null and b/res/locales/en/LC_MESSAGES/app.mo differ diff --git a/res/locales/en/LC_MESSAGES/app.po b/res/locales/en/LC_MESSAGES/app.po new file mode 100644 index 0000000..ef28afd --- /dev/null +++ b/res/locales/en/LC_MESSAGES/app.po @@ -0,0 +1,117 @@ +# Copyright (C) 2023 HenestrosaConH +# This file is distributed under the same license as the PACKAGE package. +# José Carlos López Henestrosa , 2023. +# +msgid "" +msgstr "" +"Project-Id-Version: 1.3.0\n" +"Report-Msgid-Bugs-To: José Carlos López Henestrosa \n" +"POT-Creation-Date: 2023-02-08 18:22+0100\n" +"PO-Revision-Date: 2023-02-08 18:22+0100\n" +"Last-Translator: José Carlos López Henestrosa \n" +"Language-Team: José Carlos López Henestrosa \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: src\controller\main_controller.py:35 +msgid "Select a file" +msgstr "Select a file" + +#: src\controller\main_controller.py:37 +msgid "Audio files" +msgstr "Audio files" + +#: src\controller\main_controller.py:38 +msgid "Video files" +msgstr "Video files" + +#: src\controller\main_controller.py:93 +msgid "Error: No audio file selected, please select one before generating text." +msgstr "Error: No audio file selected, please select one before generating text." + +#: src\controller\main_controller.py:116 +msgid "Error: The selected audio language is not valid." +msgstr "Error: The selected audio language is not valid." + +#: src\controller\main_controller.py:120 +#: src\controller\main_controller.py:224 +msgid "Error generating the file transcription. Please try again." +msgstr "Error generating the file transcription. Please try again." + +#: src\controller\main_controller.py:254 +msgid "Error: No microphone detected." +msgstr "Error: No microphone detected." + +#: src\controller\main_controller.py:257 +msgid "Error: Listening timed out while waiting for phrase to start." +msgstr "Error: Listening timed out while waiting for phrase to start." + +#: src\controller\main_controller.py:262 +msgid "Sorry, I cannot clarify what you are saying. Please try again." +msgstr "Sorry, I cannot clarify what you are saying. Please try again." + +#: src\controller\main_controller.py:267 +msgid "Unexpected error. Please try again." +msgstr "Unexpected error. Please try again." + +#: src\controller\main_controller.py:280 +msgid "Save as" +msgstr "Save as" + +#: src\controller\main_controller.py:282 +msgid "Text file" +msgstr "Text file" + +#: src\controller\main_controller.py:282 +msgid "All Files" +msgstr "All Files" + + +#: src\view\main_window.py:62 +#: src\view\main_window.py:204 +msgid "Select file" +msgstr "Select file" + +#: src\view\main_window.py:69 +#: src\view\main_window.py:205 +msgid "Transcribe from microphone" +msgstr "Transcribe from microphone" + +#: src\view\main_window.py:78 +msgid "Generate transcription" +msgstr "Generate transcription" + +#: src\view\main_window.py:86 +msgid "App language" +msgstr "App language" + +#: src\view\main_window.py:102 +msgid "Appearance mode" +msgstr "Appearance mode" + +#: src\view\main_window.py:108 +#: src\view\main_window.py:209 +#: src\view\main_window.py:210 +#: src\view\main_window.py:219 +msgid "System" +msgstr "System" + +#: src\view\main_window.py:108 +#: src\view\main_window.py:209 +#: src\view\main_window.py:218 +msgid "Light" +msgstr "Light" + +#: src\view\main_window.py:108 +#: src\view\main_window.py:209 +#: src\view\main_window.py:217 +msgid "Dark" +msgstr "Dark" + +#: src\view\main_window.py:133 +#: src\view\main_window.py:211 +msgid "Save transcription" +msgstr "Save transcription" diff --git a/res/locales/en/LC_MESSAGES/main_controller.po b/res/locales/en/LC_MESSAGES/main_controller.po new file mode 100644 index 0000000..4172350 --- /dev/null +++ b/res/locales/en/LC_MESSAGES/main_controller.po @@ -0,0 +1,69 @@ +# Copyright (C) 2023 HenestrosaConH +# This file is distributed under the same license as the PACKAGE package. +# José Carlos López Henestrosa , 2023. +# +msgid "" +msgstr "" +"Project-Id-Version: 1.3.0\n" +"Report-Msgid-Bugs-To: José Carlos López Henestrosa \n" +"POT-Creation-Date: 2023-02-08 18:22+0100\n" +"PO-Revision-Date: 2023-02-08 18:22+0100\n" +"Last-Translator: José Carlos López Henestrosa \n" +"Language-Team: José Carlos López Henestrosa \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:35 +msgid "Select a file" +msgstr "Select a file" + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:37 +msgid "Audio files" +msgstr "Audio files" + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:38 +msgid "Video files" +msgstr "Video files" + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:93 +msgid "Error: No audio file selected, please select one before generating text." +msgstr "Error: No audio file selected, please select one before generating text." + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:116 +msgid "Error: The selected audio language is not valid." +msgstr "Error: The selected audio language is not valid." + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:120 +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:224 +msgid "Error generating the file transcription. Please try again." +msgstr "Error generating the file transcription. Please try again." + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:254 +msgid "Error: No microphone detected." +msgstr "Error: No microphone detected." + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:257 +msgid "Error: Listening timed out while waiting for phrase to start." +msgstr "Error: Listening timed out while waiting for phrase to start." + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:262 +msgid "Sorry, I cannot clarify what you are saying. Please try again." +msgstr "Sorry, I cannot clarify what you are saying. Please try again." + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:267 +msgid "Unexpected error. Please try again." +msgstr "Unexpected error. Please try again." + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:280 +msgid "Save as" +msgstr "Save as" + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:282 +msgid "Text file" +msgstr "Text file" + +#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:282 +msgid "All Files" +msgstr "All Files" diff --git a/res/locales/en/LC_MESSAGES/main_window.po b/res/locales/en/LC_MESSAGES/main_window.po new file mode 100644 index 0000000..3168691 --- /dev/null +++ b/res/locales/en/LC_MESSAGES/main_window.po @@ -0,0 +1,62 @@ +# Copyright (C) 2023 HenestrosaConH +# This file is distributed under the same license as the PACKAGE package. +# José Carlos López Henestrosa , 2023. +# +msgid "" +msgstr "" +"Project-Id-Version: 1.3.0\n" +"Report-Msgid-Bugs-To: José Carlos López Henestrosa \n" +"POT-Creation-Date: 2023-02-08 18:22+0100\n" +"PO-Revision-Date: 2023-02-08 18:22+0100\n" +"Last-Translator: José Carlos López Henestrosa \n" +"Language-Team: José Carlos López Henestrosa \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + +#: src\view\main_window.py:62 +#: src\view\main_window.py:204 +msgid "Select file" +msgstr "Select file" + +#: src\view\main_window.py:69 +#: src\view\main_window.py:205 +msgid "Transcribe from microphone" +msgstr "Transcribe from microphone" + +#: src\view\main_window.py:78 +msgid "Generate transcription" +msgstr "Generate transcription" + +#: src\view\main_window.py:86 +msgid "App language" +msgstr "App language" + +#: src\view\main_window.py:102 +msgid "Appearance mode" +msgstr "Appearance mode" + +#: src\view\main_window.py:108 +#: src\view\main_window.py:209 +#: src\view\main_window.py:210 +#: src\view\main_window.py:219 +msgid "System" +msgstr "System" + +#: src\view\main_window.py:108 +#: src\view\main_window.py:209 +#: src\view\main_window.py:218 +msgid "Light" +msgstr "Light" + +#: src\view\main_window.py:108 +#: src\view\main_window.py:209 +#: src\view\main_window.py:217 +msgid "Dark" +msgstr "Dark" + +#: src\view\main_window.py:133 +#: src\view\main_window.py:211 +msgid "Save transcription" +msgstr "Save transcription" diff --git a/res/locales/es/LC_MESSAGES/app.mo b/res/locales/es/LC_MESSAGES/app.mo new file mode 100644 index 0000000..1db1a0c Binary files /dev/null and b/res/locales/es/LC_MESSAGES/app.mo differ diff --git a/res/locales/es/LC_MESSAGES/app.po b/res/locales/es/LC_MESSAGES/app.po new file mode 100644 index 0000000..19a0aec --- /dev/null +++ b/res/locales/es/LC_MESSAGES/app.po @@ -0,0 +1,116 @@ +# Copyright (C) 2023 HenestrosaConH +# This file is distributed under the same license as the PACKAGE package. +# José Carlos López Henestrosa , 2023. +# +msgid "" +msgstr "" +"Project-Id-Version: 1.3.0\n" +"Report-Msgid-Bugs-To: José Carlos López Henestrosa \n" +"POT-Creation-Date: 2023-02-08 18:22+0100\n" +"PO-Revision-Date: 2023-02-08 18:22+0100\n" +"Last-Translator: José Carlos López Henestrosa \n" +"Language-Team: José Carlos López Henestrosa \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + +#: src\view\main_window.py:62 +#: src\view\main_window.py:204 +msgid "Select file" +msgstr "Selecciona archivo" + +#: src\view\main_window.py:69 +#: src\view\main_window.py:205 +msgid "Transcribe from microphone" +msgstr "Transcribir del micrófono" + +#: src\view\main_window.py:78 +msgid "Generate transcription" +msgstr "Generar transcripción" + +#: src\view\main_window.py:86 +msgid "App language" +msgstr "Lenguaje del programa" + +#: src\view\main_window.py:102 +msgid "Appearance mode" +msgstr "Modo de apariencia" + +#: src\view\main_window.py:108 +#: src\view\main_window.py:209 +#: src\view\main_window.py:210 +#: src\view\main_window.py:219 +msgid "System" +msgstr "Sistema" + +#: src\view\main_window.py:108 +#: src\view\main_window.py:209 +#: src\view\main_window.py:218 +msgid "Light" +msgstr "Claro" + +#: src\view\main_window.py:108 +#: src\view\main_window.py:209 +#: src\view\main_window.py:217 +msgid "Dark" +msgstr "Oscuro" + +#: src\view\main_window.py:133 +#: src\view\main_window.py:211 +msgid "Save transcription" +msgstr "Guardar transcripción" + + +#: src\controller\main_controller.py:35 +msgid "Select a file" +msgstr "Seleccionar archivo" + +#: src\controller\main_controller.py:37 +msgid "Audio files" +msgstr "Archivos de audio" + +#: src\controller\main_controller.py:38 +msgid "Video files" +msgstr "Archivos de vídeo" + +#: src\controller\main_controller.py:93 +msgid "Error: No audio file selected, please select one before generating text." +msgstr "Error: No se ha seleccionado un archivo de audio. Por favor, seleccione uno antes de generar el texto." + +#: src\controller\main_controller.py:116 +msgid "Error: The selected audio language is not valid." +msgstr "Error: El idioma del audio seleccionado no es válido." + +#: src\controller\main_controller.py:120 +#: src\controller\main_controller.py:224 +msgid "Error generating the file transcription. Please try again." +msgstr "Error generando la transcripción. Por favor, inténtelo de nuevo." + +#: src\controller\main_controller.py:254 +msgid "Error: No microphone detected." +msgstr "Error: Micrófono no disponible." + +#: src\controller\main_controller.py:257 +msgid "Error: Listening timed out while waiting for phrase to start." +msgstr "Error: No se ha detectado audio disponible para transcribir." + +#: src\controller\main_controller.py:262 +msgid "Sorry, I cannot clarify what you are saying. Please try again." +msgstr "Lo siento, no puedo entender lo que está diciendo. Por favor, inténtelo de nuevo." + +#: src\controller\main_controller.py:267 +msgid "Unexpected error. Please try again." +msgstr "Error inesperado. Por favor, inténtelo de nuevo." + +#: src\controller\main_controller.py:280 +msgid "Save as" +msgstr "Guardar como" + +#: src\controller\main_controller.py:282 +msgid "Text file" +msgstr "Archivo de texto" + +#: src\controller\main_controller.py:282 +msgid "All Files" +msgstr "Todos los archivos" diff --git a/res/locales/es/LC_MESSAGES/main_controller.po b/res/locales/es/LC_MESSAGES/main_controller.po new file mode 100644 index 0000000..69b7baa --- /dev/null +++ b/res/locales/es/LC_MESSAGES/main_controller.po @@ -0,0 +1,69 @@ +# Copyright (C) 2023 HenestrosaConH +# This file is distributed under the same license as the PACKAGE package. +# José Carlos López Henestrosa , 2023. +# +msgid "" +msgstr "" +"Project-Id-Version: 1.3.0\n" +"Report-Msgid-Bugs-To: José Carlos López Henestrosa \n" +"POT-Creation-Date: 2023-02-08 18:22+0100\n" +"PO-Revision-Date: 2023-02-08 18:22+0100\n" +"Last-Translator: José Carlos López Henestrosa \n" +"Language-Team: José Carlos López Henestrosa \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + +#: src\controller\main_controller.py:35 +msgid "Select a file" +msgstr "Seleccionar archivo" + +#: src\controller\main_controller.py:37 +msgid "Audio files" +msgstr "Archivos de audio" + +#: src\controller\main_controller.py:38 +msgid "Video files" +msgstr "Archivos de vídeo" + +#: src\controller\main_controller.py:93 +msgid "Error: No audio file selected, please select one before generating text." +msgstr "Error: No se ha seleccionado un archivo de audio. Por favor, seleccione uno antes de generar el texto." + +#: src\controller\main_controller.py:116 +msgid "Error: The selected audio language is not valid." +msgstr "Error: El idioma del audio seleccionado no es válido." + +#: src\controller\main_controller.py:120 +#: src\controller\main_controller.py:224 +msgid "Error generating the file transcription. Please try again." +msgstr "Error generando la transcripción. Por favor, inténtelo de nuevo." + +#: src\controller\main_controller.py:254 +msgid "Error: No microphone detected." +msgstr "Error: Micrófono no disponible." + +#: src\controller\main_controller.py:257 +msgid "Error: Listening timed out while waiting for phrase to start." +msgstr "Error: No se ha detectado audio disponible para transcribir." + +#: src\controller\main_controller.py:262 +msgid "Sorry, I cannot clarify what you are saying. Please try again." +msgstr "Lo siento, no puedo entender lo que está diciendo. Por favor, inténtelo de nuevo." + +#: src\controller\main_controller.py:267 +msgid "Unexpected error. Please try again." +msgstr "Error inesperado. Por favor, inténtelo de nuevo." + +#: src\controller\main_controller.py:280 +msgid "Save as" +msgstr "Guardar como" + +#: src\controller\main_controller.py:282 +msgid "Text file" +msgstr "Archivo de texto" + +#: src\controller\main_controller.py:282 +msgid "All Files" +msgstr "Todos los archivos" diff --git a/res/locales/es/LC_MESSAGES/main_window.po b/res/locales/es/LC_MESSAGES/main_window.po new file mode 100644 index 0000000..bb1ab00 Binary files /dev/null and b/res/locales/es/LC_MESSAGES/main_window.po differ diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..78522cf --- /dev/null +++ b/src/app.py @@ -0,0 +1,71 @@ +import customtkinter as ctk +import torch +import utils.config_manager as cm +import utils.constants as c +import utils.path_helper as ph +from controller.main_controller import MainController +from model.config.config_whisperx import ConfigWhisperX +from model.transcription import Transcription +from utils.enums import ComputeType +from view.main_window import MainWindow + + +class App(ctk.CTk): + def __init__(self): + super().__init__() + + # Modes: "System" (standard), "Dark", "Light" + ctk.set_appearance_mode("System") + # Themes: "blue" (standard), "green", "dark-blue" + ctk.set_default_color_theme("blue") + + self.title(c.APP_NAME) + self.wm_iconbitmap(ph.ROOT_PATH / ph.IMG_RELATIVE_PATH / "icon.ico") + + # Initial size of the window + width = 1000 + height = 760 + self.geometry(f"{width}x{height}") + + # Min size of the window + min_width = 500 + min_height = 250 + self.minsize(min_width, min_height) + + # Check GPU + cm.ConfigManager.modify_value( + section=ConfigWhisperX.Key.SECTION, + key=ConfigWhisperX.Key.CAN_USE_GPU, + new_value=str(torch.cuda.is_available()), + ) + + if not torch.cuda.is_available(): + cm.ConfigManager.modify_value( + section=ConfigWhisperX.Key.SECTION, + key=ConfigWhisperX.Key.COMPUTE_TYPE, + new_value=ComputeType.INT8.value, + ) + + # Initialize configs + config_whisperx = cm.ConfigManager.get_config_whisperx() + config_google_api = cm.ConfigManager.get_config_google_api() + config_subtitles = cm.ConfigManager.get_config_subtitles() + + # Create the view and place it on the root window + view = MainWindow(self, config_whisperx, config_google_api, config_subtitles) + view.pack(fill="both", expand=True) + + # Create the model for the controller + transcription = Transcription() + + # Create the controller + controller = MainController(transcription, view) + + # Set the controller to view + view.set_controller(controller) + + +if __name__ == "__main__": + app = App() + app.eval("tk::PlaceWindow . center") + app.mainloop() diff --git a/src/controller/__init__.py b/src/controller/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/controller/__pycache__/__init__.cpython-313.pyc b/src/controller/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..0c016ce Binary files /dev/null and b/src/controller/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/controller/__pycache__/main_controller.cpython-313.pyc b/src/controller/__pycache__/main_controller.cpython-313.pyc new file mode 100644 index 0000000..1db6af2 Binary files /dev/null and b/src/controller/__pycache__/main_controller.cpython-313.pyc differ diff --git a/src/controller/main_controller.py b/src/controller/main_controller.py new file mode 100644 index 0000000..934ebd6 --- /dev/null +++ b/src/controller/main_controller.py @@ -0,0 +1,354 @@ +import asyncio +import os +import shutil +import threading +import traceback +from pathlib import Path +from tkinter import filedialog + +import speech_recognition as sr +import utils.audio_utils as au +import utils.config_manager as cm +import whisperx +from model.transcription import Transcription +from moviepy.video.io.VideoFileClip import VideoFileClip +from pydub import AudioSegment +from pydub.silence import split_on_silence +from pytube import YouTube +from pytube.exceptions import RegexMatchError +from utils import constants as c +from utils.enums import AudioSource, TranscriptionMethod +from utils.i18n import _ +from utils.path_helper import ROOT_PATH + + +class MainController: + def __init__(self, transcription: Transcription, view): + self.view = view + self.transcription = transcription + self._is_mic_recording = False + self._whisperx_result = None + + # PUBLIC METHODS + + def select_file(self): + """ + Prompts a file explorer to determine the audio/video file path to transcribe. + Stores the filepath in the class variable filepath_to_transcribe. + """ + filepath = filedialog.askopenfilename( + initialdir="/", + title=_("Select a file"), + filetypes=[ + ( + _("All supported files"), + c.AUDIO_FILE_EXTENSIONS + c.VIDEO_FILE_EXTENSIONS, + ), + (_("Audio files"), c.AUDIO_FILE_EXTENSIONS), + (_("Video files"), c.VIDEO_FILE_EXTENSIONS), + ], + ) + + if filepath: + self.view.on_select_file_success(filepath) + + def prepare_for_transcription(self, transcription: Transcription): + """ + Prepares the transcription process based on provided parameters. + + :raises: IndexError if the selected language code is not valid. + """ + self.transcription = transcription + + try: + self.view.on_processing_transcription() + + if transcription.source == AudioSource.FILE: + self._prepare_for_file_transcription(transcription.source_file_path) + elif transcription.source == AudioSource.MIC: + self._prepare_for_mic_transcription() + elif transcription.source == AudioSource.YOUTUBE: + self._prepare_for_yt_transcription() + + except Exception as e: + self._handle_exception(e) + + async def handle_transcription_process(self): + try: + # Get transcription + if self.transcription.method == TranscriptionMethod.WHISPERX.value: + await self._transcribe_using_whisperx() + elif self.transcription.method == TranscriptionMethod.GOOGLE_API.value: + await self._transcribe_using_google_api() + + if self.transcription.source in (AudioSource.MIC, AudioSource.YOUTUBE): + self.transcription.source_file_path.unlink() # Remove tmp file + + except Exception as e: + self._handle_exception(e) + + finally: + is_transcription_empty = not self.transcription.text + self.view.on_processed_transcription(success=is_transcription_empty) + + def stop_recording_from_mic(self): + self._is_mic_recording = False + + def save_transcription(self): + """ + Prompts a file explorer to determine the file to save the + generated transcription. + """ + file_path = Path(self.transcription.source_file_path) + + file = filedialog.asksaveasfile( + mode="w", + initialdir=file_path.parent, + initialfile=f"{file_path.stem}.txt", + title=_("Save as"), + defaultextension=".txt", + filetypes=[(_("Text file"), "*.txt"), (_("All Files"), "*.*")], + ) + + if file: + file.write(self.transcription.text) + file.close() + + if self.transcription.should_subtitle: + self._generate_subtitles(Path(file.name)) + + # PRIVATE METHODS + + def _prepare_for_file_transcription(self, source_file_path: str): + if self._is_file_valid(source_file_path): + self.transcription.source_file_path = Path(source_file_path) + + threading.Thread( + target=lambda loop: loop.run_until_complete( + self.handle_transcription_process() + ), + args=(asyncio.new_event_loop(),), + ).start() + else: + raise ValueError("Error: No valid file selected.") + + def _prepare_for_mic_transcription(self): + threading.Thread(target=self._record_from_mic).start() + + def _prepare_for_yt_transcription(self): + threading.Thread(target=self._download_audio_from_yt_video).start() + + def _handle_exception(self, e: Exception): + print(traceback.format_exc()) + self.view.on_processed_transcription(success=False) + self.view.display_text(repr(e)) + + @staticmethod + def _is_file_valid(source_file_path: str): + filepath = Path(source_file_path) + is_audio = filepath.suffix in c.AUDIO_FILE_EXTENSIONS + is_video = filepath.suffix in c.VIDEO_FILE_EXTENSIONS + + return filepath.is_file() and (is_audio or is_video) + + async def _transcribe_using_whisperx(self): + config_whisperx = cm.ConfigManager.get_config_whisperx() + + device = "cpu" if config_whisperx.use_cpu else "cuda" + task = "translate" if self.transcription.should_translate else "transcribe" + + try: + model = whisperx.load_model( + config_whisperx.model_size, + device, + compute_type=config_whisperx.compute_type, + task=task, + language=self.transcription.language_code, + ) + + audio_path = str(self.transcription.source_file_path) + audio = whisperx.load_audio(audio_path) + self._whisperx_result = model.transcribe( + audio, batch_size=config_whisperx.batch_size + ) + + text_combined = " ".join( + segment["text"].strip() for segment in self._whisperx_result["segments"] + ) + + # Align output if should subtitle + if self.transcription.should_subtitle: + model_aligned, metadata = whisperx.load_align_model( + language_code=self.transcription.language_code, device=device + ) + self._whisperx_result = whisperx.align( + self._whisperx_result["segments"], + model_aligned, + metadata, + audio, + device, + return_char_alignments=False, + ) + + self.transcription.text = text_combined + self.view.display_text(self.transcription.text) + + except Exception as e: + self._handle_exception(e) + + async def _transcribe_using_google_api(self): + """ + Splits a large audio file into chunks + and applies speech recognition on each one. + """ + file_path = self.transcription.source_file_path + + # Can be the transcription or an error text + transcription_text = "" + + # Create a directory to store the audio chunks + chunks_directory = ROOT_PATH / "audio-chunks" + chunks_directory.mkdir(exist_ok=True) + + try: + # Get file extension + content_type = Path(file_path).suffix + + sound = None + # Open the audio file using pydub + if content_type in c.AUDIO_FILE_EXTENSIONS: + sound = AudioSegment.from_file(file_path) + + elif content_type in c.VIDEO_FILE_EXTENSIONS: + clip = VideoFileClip(str(file_path)) + video_audio_path = chunks_directory / f"{Path(file_path).stem}.wav" + clip.audio.write_audiofile(video_audio_path) + sound = AudioSegment.from_wav(video_audio_path) + + audio_chunks = split_on_silence( + sound, + # Minimum duration of silence required to consider a segment as a split point + min_silence_len=500, + # Audio with a level -X decibels below the original audio level will be considered as silence + silence_thresh=sound.dBFS - 40, + # Adds a buffer of silence before and after each split point + keep_silence=100, + ) + + # Create a speech recognition object + r = sr.Recognizer() + + # Get Google API key (if any) + config_google_api = cm.ConfigManager.get_config_google_api() + api_key = config_google_api.api_key or None + + # Process each chunk + for idx, audio_chunk in enumerate(audio_chunks): + # Export audio chunk and save it in the `chunks_directory` directory. + chunk_filename = os.path.join(chunks_directory, f"chunk{idx}.wav") + audio_chunk.export(chunk_filename, bitrate="192k", format="wav") + + # Recognize the chunk + with sr.AudioFile(chunk_filename) as source: + r.adjust_for_ambient_noise(source) + audio_listened = r.record(source) + + try: + # Try converting it to text + chunk_text = r.recognize_google( + audio_listened, + language=self.transcription.language_code, + key=api_key, + ) + + chunk_text = f"{chunk_text.capitalize()}. " + transcription_text += chunk_text + print(f"chunk text: {chunk_text}") + + except Exception: + continue + + self.transcription.text = transcription_text + + except Exception: + self.view.display_text(traceback.format_exc()) + + finally: + # Delete temporal directory and files + shutil.rmtree(chunks_directory) + + if self.transcription.text: + self.view.display_text(self.transcription.text) + + def _record_from_mic(self): + self._is_mic_recording = True + audio_data = [] + + try: + r = sr.Recognizer() + + with sr.Microphone() as mic: + while self._is_mic_recording: + audio_chunk = r.listen(mic, timeout=5) + audio_data.append(audio_chunk) + + if audio_data: + filename = "mic-output.wav" + au.save_audio_data(audio_data, filename=filename) + self.transcription.source_file_path = Path(filename) + + threading.Thread( + target=lambda loop: loop.run_until_complete( + self.handle_transcription_process() + ), + args=(asyncio.new_event_loop(),), + ).start() + else: + e = ValueError("No audio detected") + self._handle_exception(e) + + except Exception as e: + self.view.stop_recording_from_mic() + self._handle_exception(e) + + def _generate_subtitles(self, file_path): + config_subtitles = cm.ConfigManager.get_config_subtitles() + + output_formats = ["srt", "vtt"] + output_dir = file_path.parent + + for output_format in output_formats: + writer = whisperx.transcribe.get_writer(output_format, output_dir) + writer_args = { + "highlight_words": config_subtitles.highlight_words, + "max_line_count": config_subtitles.max_line_count, + "max_line_width": config_subtitles.max_line_width, + } + + # https://github.com/m-bain/whisperX/issues/455#issuecomment-1707547704 + self._whisperx_result["language"] = "en" + + writer(self._whisperx_result, file_path, writer_args) + + def _download_audio_from_yt_video(self): + try: + yt = YouTube(self.transcription.youtube_url) + stream = yt.streams.filter(only_audio=True).first() + output_file = stream.download(output_path=".", filename="yt-audio.mp3") + + if output_file: + self.transcription.source_file_path = Path(output_file) + + threading.Thread( + target=lambda loop: loop.run_until_complete( + self.handle_transcription_process() + ), + args=(asyncio.new_event_loop(),), + ).start() + + except RegexMatchError: + e = ValueError("The URL is not correct.") + self._handle_exception(e) + + except Exception as e: + self._handle_exception(e) diff --git a/src/model/__init__.py b/src/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/model/__pycache__/__init__.cpython-313.pyc b/src/model/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..6c7ae75 Binary files /dev/null and b/src/model/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/model/config/__init__.py b/src/model/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/model/config/__pycache__/__init__.cpython-313.pyc b/src/model/config/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..7a3b630 Binary files /dev/null and b/src/model/config/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/model/config/__pycache__/config_google_api.cpython-313.pyc b/src/model/config/__pycache__/config_google_api.cpython-313.pyc new file mode 100644 index 0000000..e2fc288 Binary files /dev/null and b/src/model/config/__pycache__/config_google_api.cpython-313.pyc differ diff --git a/src/model/config/__pycache__/config_subtitles.cpython-313.pyc b/src/model/config/__pycache__/config_subtitles.cpython-313.pyc new file mode 100644 index 0000000..b9b9515 Binary files /dev/null and b/src/model/config/__pycache__/config_subtitles.cpython-313.pyc differ diff --git a/src/model/config/__pycache__/config_whisperx.cpython-313.pyc b/src/model/config/__pycache__/config_whisperx.cpython-313.pyc new file mode 100644 index 0000000..354842f Binary files /dev/null and b/src/model/config/__pycache__/config_whisperx.cpython-313.pyc differ diff --git a/src/model/config/config_google_api.py b/src/model/config/config_google_api.py new file mode 100644 index 0000000..326a30a --- /dev/null +++ b/src/model/config/config_google_api.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass +from enum import Enum +from typing import Optional + + +@dataclass +class ConfigGoogleApi: + api_key: str + + class Key(Enum): + """ + Enum class for keys associated with the Google API configuration. + """ + + SECTION = "google_api" + API_KEY = "api_key" + + def value_type(self) -> Optional[str]: + """ + Get the value type associated with the ConfigKey. + """ + type_mapping = { + self.API_KEY: "str", + } + + return type_mapping.get(self, None) diff --git a/src/model/config/config_subtitles.py b/src/model/config/config_subtitles.py new file mode 100644 index 0000000..88af1c9 --- /dev/null +++ b/src/model/config/config_subtitles.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass +from enum import Enum +from typing import Optional + + +@dataclass +class ConfigSubtitles: + highlight_words: bool + max_line_count: int + max_line_width: int + + class Key(Enum): + """ + Enum class for keys associated with the subtitles configuration. + """ + + SECTION = "subtitles" + HIGHLIGHT_WORDS = "highlight_words" + MAX_LINE_COUNT = "max_line_count" + MAX_LINE_WIDTH = "max_line_width" + + def value_type(self) -> Optional[str]: + """Get the value type associated with the ConfigKey.""" + type_mapping = { + self.HIGHLIGHT_WORDS: "bool", + self.MAX_LINE_COUNT: "int", + self.MAX_LINE_WIDTH: "int", + } + + return type_mapping.get(self, None) diff --git a/src/model/config/config_whisperx.py b/src/model/config/config_whisperx.py new file mode 100644 index 0000000..5cf1261 --- /dev/null +++ b/src/model/config/config_whisperx.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass +from enum import Enum +from typing import Optional + + +@dataclass +class ConfigWhisperX: + model_size: str + batch_size: int + compute_type: str + use_cpu: bool + can_use_gpu: bool + + class Key(Enum): + """ + Enum class for keys associated with the WhisperX configuration. + """ + + SECTION = "whisperx" + MODEL_SIZE = "model_size" + BATCH_SIZE = "batch_size" + COMPUTE_TYPE = "compute_type" + USE_CPU = "use_cpu" + CAN_USE_GPU = "can_use_gpu" + + def value_type(self) -> Optional[str]: + """ + Get the value type associated with the ConfigKey. + + :return + :rtype: str + """ + type_mapping = { + self.MODEL_SIZE: "str", + self.BATCH_SIZE: "int", + self.COMPUTE_TYPE: "str", + self.USE_CPU: "bool", + self.CAN_USE_GPU: "bool", + } + + return type_mapping.get(self, None) diff --git a/src/model/transcription.py b/src/model/transcription.py new file mode 100644 index 0000000..16eb493 --- /dev/null +++ b/src/model/transcription.py @@ -0,0 +1,17 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +from utils.enums import AudioSource + + +@dataclass +class Transcription: + text: Optional[str] = None + language_code: Optional[str] = None + source: Optional[AudioSource] = None + source_file_path: Optional[Path | str] = Path("/") + method: Optional[int] = None + should_translate: bool = False + should_subtitle: bool = False + youtube_url: str = None diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/__pycache__/__init__.cpython-313.pyc b/src/utils/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..2cc4313 Binary files /dev/null and b/src/utils/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/utils/__pycache__/audio_utils.cpython-313.pyc b/src/utils/__pycache__/audio_utils.cpython-313.pyc new file mode 100644 index 0000000..4b859d4 Binary files /dev/null and b/src/utils/__pycache__/audio_utils.cpython-313.pyc differ diff --git a/src/utils/__pycache__/config_manager.cpython-313.pyc b/src/utils/__pycache__/config_manager.cpython-313.pyc new file mode 100644 index 0000000..c242718 Binary files /dev/null and b/src/utils/__pycache__/config_manager.cpython-313.pyc differ diff --git a/src/utils/__pycache__/constants.cpython-313.pyc b/src/utils/__pycache__/constants.cpython-313.pyc new file mode 100644 index 0000000..4b85a42 Binary files /dev/null and b/src/utils/__pycache__/constants.cpython-313.pyc differ diff --git a/src/utils/__pycache__/path_helper.cpython-313.pyc b/src/utils/__pycache__/path_helper.cpython-313.pyc new file mode 100644 index 0000000..1bfed99 Binary files /dev/null and b/src/utils/__pycache__/path_helper.cpython-313.pyc differ diff --git a/src/utils/audio_utils.py b/src/utils/audio_utils.py new file mode 100644 index 0000000..1c43107 --- /dev/null +++ b/src/utils/audio_utils.py @@ -0,0 +1,26 @@ +import speech_recognition as sr +from pydub import AudioSegment + + +def save_audio_data(audio_data, filename): + if audio_data: + raw_audio_data = b"".join( + [ + chunk.get_raw_data(convert_rate=None, convert_width=None) + for chunk in audio_data + ] + ) + audio = AudioSegment( + raw_audio_data, + sample_width=audio_data[0].sample_width, + frame_rate=audio_data[0].sample_rate, + channels=1, + ) + + try: + audio.export(filename, format="wav") + print(f"Audio data saved to {filename}") + except sr.UnknownValueError: + print("Could not save audio data. Unknown value error.") + except sr.RequestError as e: + print(f"Could not request results; {e}") diff --git a/src/utils/config_manager.py b/src/utils/config_manager.py new file mode 100644 index 0000000..01ee401 --- /dev/null +++ b/src/utils/config_manager.py @@ -0,0 +1,110 @@ +from configparser import ConfigParser +from pathlib import Path +from typing import Optional, Union + +from model.config.config_google_api import ConfigGoogleApi +from model.config.config_subtitles import ConfigSubtitles +from model.config.config_whisperx import ConfigWhisperX +from utils.path_helper import ROOT_PATH + + +class ConfigManager: + _FILE_PATH = ROOT_PATH / "config.ini" + KeyType = Union[ConfigWhisperX.Key, ConfigGoogleApi.Key, ConfigSubtitles.Key] + + @staticmethod + def read_config(file_path: Path = _FILE_PATH) -> Optional[ConfigParser]: + config = ConfigParser() + config.read(file_path) + return config + + @staticmethod + def get_config_whisperx() -> ConfigWhisperX: + section = ConfigWhisperX.Key.SECTION + + return ConfigWhisperX( + model_size=ConfigManager.get_value(section, ConfigWhisperX.Key.MODEL_SIZE), + batch_size=ConfigManager.get_value(section, ConfigWhisperX.Key.BATCH_SIZE), + compute_type=ConfigManager.get_value( + section, ConfigWhisperX.Key.COMPUTE_TYPE + ), + use_cpu=ConfigManager.get_value(section, ConfigWhisperX.Key.USE_CPU), + can_use_gpu=ConfigManager.get_value( + section, ConfigWhisperX.Key.CAN_USE_GPU + ), + ) + + @staticmethod + def get_config_google_api() -> ConfigGoogleApi: + section = ConfigGoogleApi.Key.SECTION + + return ConfigGoogleApi( + api_key=ConfigManager.get_value(section, ConfigGoogleApi.Key.API_KEY), + ) + + @staticmethod + def get_config_subtitles() -> ConfigSubtitles: + section = ConfigSubtitles.Key.SECTION + + return ConfigSubtitles( + highlight_words=ConfigManager.get_value( + section, ConfigSubtitles.Key.HIGHLIGHT_WORDS + ), + max_line_count=ConfigManager.get_value( + section, ConfigSubtitles.Key.MAX_LINE_COUNT + ), + max_line_width=ConfigManager.get_value( + section, ConfigSubtitles.Key.MAX_LINE_WIDTH + ), + ) + + @staticmethod + def get_value( + section: KeyType, + key: KeyType, + file_path: Path = _FILE_PATH, + ) -> Optional[Union[str, bool, int, float]]: + config = ConfigManager.read_config(file_path) + + section_name = section.value + key_name = key.value + key_value_type = key.value_type() + + # Check if the section and key exist before getting the value + if section_name in config and key_name in config[section_name]: + if key_value_type == "str": + return config.get(section_name, key_name) + elif key_value_type == "bool": + return config.getboolean(section_name, key_name) + elif key_value_type == "int": + return config.getint(section_name, key_name) + elif key_value_type == "float": + return config.getfloat(section_name, key_name) + else: + print( + f"Section [{section_name}] or Key [{key_name}] not found in the config" + ) + return None + + @staticmethod + def modify_value( + section: KeyType, + key: KeyType, + new_value: str, + file_path: Path = _FILE_PATH, + ): + config = ConfigManager.read_config(file_path) + + section_name = section.value + key_name = key.value + + # Check if the section and option exist before modifying + if section_name in config and key_name in config[section_name]: + config.set(section_name, key_name, new_value) + + with open(file_path, "w") as config_file: + config.write(config_file) + + print(f"Value for [{section}][{key_name}] modified to {new_value}") + else: + print(f"Section [{section}] or Key [{key_name}] not found in the config") diff --git a/src/utils/constants.py b/src/utils/constants.py new file mode 100644 index 0000000..2d286c0 --- /dev/null +++ b/src/utils/constants.py @@ -0,0 +1,104 @@ +APP_NAME = "Audiotext" +APP_LANGUAGES = {"en": "English", "es": "Español"} + +# Code languages convention: ISO 639-1 +AUDIO_LANGUAGES = { + "af": "Afrikaans", + "am": "Amharic", + "ar": "Arabic", + "hy": "Armenian", + "az": "Azerbaijan", + "eu": "Basque", + "be": "Belarusian", + "bn": "Bengali", + "bg": "Bulgarian", + "ca": "Catalan", + "zh": "Chinese (China)", + "zh_HK": "Chinese (Hong Kong)", + "zh_TW": "Chinese (Taiwan)", + "hr": "Croatian", + "cs": "Czech", + "da": "Danish", + "nl": "Dutch", + "en": "English", + "et": "Estonian", + "fa": "Farsi", + "fil": "Filipino", + "fi": "Finnish", + "fr": "French", + "gl": "Galician", + "ka": "Georgian", + "de": "German", + "de_CH": "German (Swiss Standard)", + "el": "Greek", + "gu": "Gujarati", + "iw": "Hebrew", + "hi": "Hindi", + "hu": "Hungarian", + "is": "Icelandic", + "id": "Indonesian", + "it": "Italian", + "it_CH": "Swiss Italian", + "ja": "Japanese", + "jv": "Javanese", + "kn": "Kannada", + "kk": "Kazakh", + "km": "Khmer", + "ko": "Korean", + "lo": "Lao", + "lv": "Latvian", + "lt": "Lithuanian", + "ms": "Malay", + "ml": "Malayalam", + "mt": "Maltese", + "mr": "Marathi", + "mn": "Mongolian", + "ne": "Nepali", + "no": "Norwegian", + "nn": "Norwegian Nynorsk", + "pl": "Polish", + "pt": "Português", + "pa": "Punjabi", + "ro": "Romanian", + "ru": "Russian", + "sr": "Serbian", + "si": "Sinhala", + "sk": "Slovak", + "sl": "Slovenian", + "es": "Spanish", + "su": "Sundanese", + "sw": "Swahili", + "sv": "Swedish", + "ta": "Tamil", + "te": "Telugu", + "th": "Thai", + "tr": "Turkish", + "uk": "Ukrainian", + "ur": "Urdu", + "vi": "Vietnamese", + "zu": "Zulu", +} + +AUDIO_FILE_EXTENSIONS = [ + ".mp3", + ".mpeg", + ".wav", + ".wma", + ".aac", + ".flac", + ".ogg", + ".oga", + ".opus", +] + +# fmt: off +VIDEO_FILE_EXTENSIONS = [ + ".mp4", ".m4a", ".m4v", ".f4v", ".f4a", ".m4b", ".m4r", ".f4b", ".mov", # MP4 + ".avi", # AVI + ".webm", # WebM + ".flv", # FLV + ".mkv", # MKV + ".3gp", ".3gp2", ".3g2", ".3gpp", ".3gpp2", # 3GP + ".ogv", ".ogx", # OGG + ".wmv", ".asf" # AIFF / ASF +] diff --git a/src/utils/dict_utils.py b/src/utils/dict_utils.py new file mode 100644 index 0000000..e975902 --- /dev/null +++ b/src/utils/dict_utils.py @@ -0,0 +1,5 @@ +def find_key_by_value(dictionary, target_value): + for key, value in dictionary.items(): + if value == target_value: + return key + return None diff --git a/src/utils/enums.py b/src/utils/enums.py new file mode 100644 index 0000000..259c0fc --- /dev/null +++ b/src/utils/enums.py @@ -0,0 +1,40 @@ +from enum import Enum + + +class AudioSource(Enum): + FILE = "File" + MIC = "Microphone" + YOUTUBE = "YouTube" + + +class TranscriptionMethod(Enum): + WHISPERX = 0 + GOOGLE_API = 1 + + +class Color(Enum): + LIGHT_RED = "#D30000" + DARK_RED = "#8b0000" + HOVER_LIGHT_RED = "#BF0000" + HOVER_DARK_RED = "#610000" + + LIGHT_BLUE = "#3B8ED0" + DARK_BLUE = "#1F6AA5" + HOVER_LIGHT_BLUE = "#36719F" + HOVER_DARK_BLUE = "#144870" + + +class ComputeType(Enum): + INT8 = "int8" + FLOAT16 = "float16" + FLOAT32 = "float32" + + +class ModelSize(Enum): + TINY = "tiny" + BASE = "base" + SMALL = "small" + MEDIUM = "medium" + LARGE_V1 = "large-v1" + LARGE_V2 = "large-v2" + LARGE_V3 = "large-v3" diff --git a/src/utils/i18n.py b/src/utils/i18n.py new file mode 100644 index 0000000..28f1de3 --- /dev/null +++ b/src/utils/i18n.py @@ -0,0 +1,44 @@ +import gettext +import locale + +import utils.constants as c +import utils.path_helper as ph + +_ = None + + +def load_translation(language_code: str): + """ + Loads the translation for the provided language code. + + This function uses the gettext library to load the translation file + for the provided language code from the localedir. If the translation + file is not found, the fallback is set to True so that a default language + will be used if available. The function then installs the loaded translation + and sets the global _ variable to the gettext function for later use. + + :param language_code: The code for the language to be used for translation. + :type language_code: str + """ + try: + lang_code_without_territory = language_code.split("_")[0] + if lang_code_without_territory not in c.APP_LANGUAGES: + lang_code_without_territory = "en" + except Exception: + lang_code_without_territory = "en" + locale.setlocale(locale.LC_ALL, "en_US") + + translation = gettext.translation( + "app", + localedir=ph.ROOT_PATH / "res/locales", + languages=[lang_code_without_territory], + fallback=True, + ) + translation.install() + + global _ + _ = translation.gettext + + +load_translation("en") +# load_translation(locale.getdefaultlocale()[0][:2]) diff --git a/src/utils/path_helper.py b/src/utils/path_helper.py new file mode 100644 index 0000000..cadeeca --- /dev/null +++ b/src/utils/path_helper.py @@ -0,0 +1,26 @@ +import sys +from pathlib import Path + + +def get_path(relative_path: str = "") -> Path: + """ + Gets absolute path of the project. + + :param relative_path: The relative path to the application's base path. + Default is an empty string. + :type relative_path: str + :return: The absolute path to the file or directory specified by the relative path. + :rtype: Path + """ + try: + # PyInstaller creates a temp folder and stores path in _MEIPASS + base_path = Path(sys._MEIPASS) + except (Exception,): + base_path = Path(__file__).parent.parent.parent + + return base_path / relative_path + + +IMG_RELATIVE_PATH = "res/img" + +ROOT_PATH = get_path("") diff --git a/src/view/__init__.py b/src/view/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/view/custom_widgets/__init__.py b/src/view/custom_widgets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/view/custom_widgets/ctk_input_dialog.py b/src/view/custom_widgets/ctk_input_dialog.py new file mode 100644 index 0000000..132ed4e --- /dev/null +++ b/src/view/custom_widgets/ctk_input_dialog.py @@ -0,0 +1,168 @@ +from typing import Optional, Tuple, Union + +import customtkinter as ctk +from utils.enums import Color + + +class CTkInputDialog(ctk.CTkToplevel): + """ + Dialog with extra window, message, entry widget, cancel and ok button. + For detailed information check out the documentation. + """ + + def __init__( + self, + fg_color: Optional[Union[str, Tuple[str, str]]] = None, + text_color: Optional[Union[str, Tuple[str, str]]] = None, + button_fg_color: Optional[Union[str, Tuple[str, str]]] = None, + button_hover_color: Optional[Union[str, Tuple[str, str]]] = None, + button_text_color: Optional[Union[str, Tuple[str, str]]] = None, + entry_fg_color: Optional[Union[str, Tuple[str, str]]] = None, + entry_border_color: Optional[Union[str, Tuple[str, str]]] = None, + entry_text_color: Optional[Union[str, Tuple[str, str]]] = None, + title: str = "CTkDialog", + font: Optional[Union[tuple, ctk.CTkFont]] = None, + label_text: str = "CTkDialog", + entry_text: Optional[str] = None, + ): + super().__init__(fg_color=fg_color) + + self._fg_color = ( + ctk.ThemeManager.theme["CTkToplevel"]["fg_color"] + if fg_color is None + else self._check_color_type(fg_color) + ) + self._text_color = ( + ctk.ThemeManager.theme["CTkLabel"]["text_color"] + if text_color is None + else self._check_color_type(button_hover_color) + ) + self._button_fg_color = ( + ctk.ThemeManager.theme["CTkButton"]["fg_color"] + if button_fg_color is None + else self._check_color_type(button_fg_color) + ) + self._button_hover_color = ( + ctk.ThemeManager.theme["CTkButton"]["hover_color"] + if button_hover_color is None + else self._check_color_type(button_hover_color) + ) + self._button_text_color = ( + ctk.ThemeManager.theme["CTkButton"]["text_color"] + if button_text_color is None + else self._check_color_type(button_text_color) + ) + self._entry_fg_color = ( + ctk.ThemeManager.theme["CTkEntry"]["fg_color"] + if entry_fg_color is None + else self._check_color_type(entry_fg_color) + ) + self._entry_border_color = ( + ctk.ThemeManager.theme["CTkEntry"]["border_color"] + if entry_border_color is None + else self._check_color_type(entry_border_color) + ) + self._entry_text_color = ( + ctk.ThemeManager.theme["CTkEntry"]["text_color"] + if entry_text_color is None + else self._check_color_type(entry_text_color) + ) + + self._user_input: Union[str, None] = None + self._running: bool = False + self._title = title + self._label_text = label_text + self._entry_text = entry_text + self._font = font + + self.title(self._title) + self.lift() # lift window on top + self.attributes("-topmost", True) # stay on top + self.protocol("WM_DELETE_WINDOW", self._on_closing) + self.after( + 10, self._create_widgets + ) # create widgets with slight delay, to avoid white flickering of background + self.resizable(False, False) + self.grab_set() # make other windows not clickable + + def _create_widgets(self): + self.grid_columnconfigure((0, 1), weight=1) + self.rowconfigure(0, weight=1) + + self._label = ctk.CTkLabel( + master=self, + width=300, + wraplength=300, + fg_color="transparent", + text_color=self._text_color, + text=self._label_text, + font=self._font, + ) + self._label.grid(row=0, column=0, columnspan=2, padx=20, pady=20, sticky="ew") + + self._entry = ctk.CTkEntry( + master=self, + width=230, + fg_color=self._entry_fg_color, + border_color=self._entry_border_color, + text_color=self._entry_text_color, + font=self._font, + textvariable=ctk.StringVar(self, self._entry_text), + ) + self._entry.grid( + row=1, column=0, columnspan=2, padx=20, pady=(0, 20), sticky="ew" + ) + + self._ok_button = ctk.CTkButton( + master=self, + width=100, + border_width=0, + fg_color=self._button_fg_color, + hover_color=self._button_hover_color, + text_color=self._button_text_color, + text="Ok", + font=self._font, + command=self._ok_event, + ) + self._ok_button.grid( + row=2, column=0, columnspan=1, padx=(20, 10), pady=(0, 20), sticky="ew" + ) + + self._cancel_button = ctk.CTkButton( + master=self, + width=100, + border_width=0, + fg_color=(Color.LIGHT_RED.value, Color.DARK_RED.value), + hover_color=( + Color.HOVER_LIGHT_RED.value, + Color.HOVER_DARK_RED.value, + ), + text_color=self._button_text_color, + text="Cancel", + font=self._font, + command=self._cancel_event, + ) + self._cancel_button.grid( + row=2, column=1, columnspan=1, padx=(10, 20), pady=(0, 20), sticky="ew" + ) + + # set focus to entry with slight delay, otherwise it won't work + self.after(150, lambda: self._entry.focus()) + self._entry.bind("", self._ok_event) + + def _ok_event(self): + self._user_input = self._entry.get() + self.grab_release() + self.destroy() + + def _on_closing(self): + self.grab_release() + self.destroy() + + def _cancel_event(self): + self.grab_release() + self.destroy() + + def get_input(self): + self.master.wait_window(self) + return self._user_input diff --git a/src/view/custom_widgets/ctk_scrollable_dropdown/__init__.py b/src/view/custom_widgets/ctk_scrollable_dropdown/__init__.py new file mode 100644 index 0000000..eb90a08 --- /dev/null +++ b/src/view/custom_widgets/ctk_scrollable_dropdown/__init__.py @@ -0,0 +1,12 @@ +""" +CustomTkinter Scrollable Dropdown Menu +Author: Akash Bora +License: MIT +This is a custom dropdown menu for customtkinter. +Homepage: https://github.com/Akascape/CTkScrollableDropdown +""" + +__version__ = "1.0" + +from .ctk_scrollable_dropdown import CTkScrollableDropdown +from .ctk_scrollable_dropdown_frame import CTkScrollableDropdownFrame diff --git a/src/view/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown.py b/src/view/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown.py new file mode 100644 index 0000000..c5acf97 --- /dev/null +++ b/src/view/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown.py @@ -0,0 +1,453 @@ +""" +Advanced Scrollable Dropdown class for customtkinter widgets +Author: Akash Bora +""" + +import difflib +import sys +import time + +import customtkinter + + +class CTkScrollableDropdown(customtkinter.CTkToplevel): + def __init__( + self, + attach, + x=None, + y=None, + button_color=None, + height: int = 200, + width: int = None, + fg_color=None, + button_height: int = 20, + justify="center", + scrollbar_button_color=None, + scrollbar=True, + scrollbar_button_hover_color=None, + frame_border_width=2, + values=[], + command=None, + image_values=[], + alpha: float = 0.97, + frame_corner_radius=20, + double_click=False, + resize=True, + frame_border_color=None, + text_color=None, + autocomplete=False, + hover_color=None, + **button_kwargs + ): + super().__init__(takefocus=1) + + self.focus() + self.lift() + self.alpha = alpha + self.attach = attach + self.corner = frame_corner_radius + self.padding = 0 + self.focus_something = False + self.disable = True + self.update() + + if sys.platform.startswith("win"): + self.after(100, lambda: self.overrideredirect(True)) + self.transparent_color = self._apply_appearance_mode(self._fg_color) + self.attributes("-transparentcolor", self.transparent_color) + elif sys.platform.startswith("darwin"): + self.overrideredirect(True) + self.transparent_color = "systemTransparent" + self.attributes("-transparent", True) + self.focus_something = True + else: + self.overrideredirect(True) + self.transparent_color = "#000001" + self.corner = 0 + self.padding = 18 + self.withdraw() + + self.hide = True + self.attach.bind( + "", + lambda e: self._withdraw() if not self.disable else None, + add="+", + ) + self.attach.winfo_toplevel().bind( + "", + lambda e: self._withdraw() if not self.disable else None, + add="+", + ) + self.attach.winfo_toplevel().bind( + "", + lambda e: self._withdraw() if not self.disable else None, + add="+", + ) + + self.attributes("-alpha", 0) + self.disable = False + self.fg_color = ( + customtkinter.ThemeManager.theme["CTkFrame"]["fg_color"] + if fg_color is None + else fg_color + ) + self.scroll_button_color = ( + customtkinter.ThemeManager.theme["CTkScrollbar"]["button_color"] + if scrollbar_button_color is None + else scrollbar_button_color + ) + self.scroll_hover_color = ( + customtkinter.ThemeManager.theme["CTkScrollbar"]["button_hover_color"] + if scrollbar_button_hover_color is None + else scrollbar_button_hover_color + ) + self.frame_border_color = ( + customtkinter.ThemeManager.theme["CTkFrame"]["border_color"] + if frame_border_color is None + else frame_border_color + ) + self.button_color = ( + customtkinter.ThemeManager.theme["CTkFrame"]["top_fg_color"] + if button_color is None + else button_color + ) + self.text_color = ( + customtkinter.ThemeManager.theme["CTkLabel"]["text_color"] + if text_color is None + else text_color + ) + self.hover_color = ( + customtkinter.ThemeManager.theme["CTkButton"]["hover_color"] + if hover_color is None + else hover_color + ) + + if scrollbar is False: + self.scroll_button_color = self.fg_color + self.scroll_hover_color = self.fg_color + + self.frame = customtkinter.CTkScrollableFrame( + self, + bg_color=self.transparent_color, + fg_color=self.fg_color, + scrollbar_button_hover_color=self.scroll_hover_color, + corner_radius=self.corner, + border_width=frame_border_width, + scrollbar_button_color=self.scroll_button_color, + border_color=self.frame_border_color, + ) + self.frame._scrollbar.grid_configure(padx=3) + self.frame.pack(expand=True, fill="both") + self.dummy_entry = customtkinter.CTkEntry( + self.frame, fg_color="transparent", border_width=0, height=1, width=1 + ) + self.no_match = customtkinter.CTkLabel(self.frame, text="No Match") + self.height = height + self.height_new = height + self.width = width + self.command = command + self.fade = False + self.resize = resize + self.autocomplete = autocomplete + self.var_update = customtkinter.StringVar() + self.appear = False + + if justify.lower() == "left": + self.justify = "w" + elif justify.lower() == "right": + self.justify = "e" + else: + self.justify = "c" + + self.button_height = button_height + self.values = values + self.button_num = len(self.values) + self.image_values = ( + None if len(image_values) != len(self.values) else image_values + ) + + self.resizable(width=False, height=False) + self.transient(self.master) + self._init_buttons(**button_kwargs) + + # Add binding for different ctk widgets + if ( + double_click + or self.attach.winfo_name().startswith("!ctkentry") + or self.attach.winfo_name().startswith("!ctkcombobox") + ): + self.attach.bind("", lambda e: self._iconify(), add="+") + else: + self.attach.bind("", lambda e: self._iconify(), add="+") + + if self.attach.winfo_name().startswith("!ctkcombobox"): + self.attach._canvas.tag_bind( + "right_parts", "", lambda e: self._iconify() + ) + self.attach._canvas.tag_bind( + "dropdown_arrow", "", lambda e: self._iconify() + ) + if self.command is None: + self.command = self.attach.set + + if self.attach.winfo_name().startswith("!ctkoptionmenu"): + self.attach._canvas.bind("", lambda e: self._iconify()) + self.attach._text_label.bind("", lambda e: self._iconify()) + if self.command is None: + self.command = self.attach.set + + self.attach.bind("", lambda _: self._destroy(), add="+") + + self.update_idletasks() + self.x = x + self.y = y + + if self.autocomplete: + self.bind_autocomplete() + + self.deiconify() + self.withdraw() + + self.attributes("-alpha", self.alpha) + + def _destroy(self): + self.after(500, self.destroy_popup) + + def _withdraw(self): + if self.winfo_viewable() and self.hide: + self.withdraw() + + self.event_generate("<>") + self.hide = True + + def _update(self, a, b, c): + self.live_update(self.attach._entry.get()) + + def bind_autocomplete( + self, + ): + def appear(x): + self.appear = True + + if self.attach.winfo_name().startswith("!ctkcombobox"): + self.attach._entry.configure(textvariable=self.var_update) + self.attach._entry.bind("", appear) + self.attach.set(self.values[0]) + self.var_update.trace_add("write", self._update) + + if self.attach.winfo_name().startswith("!ctkentry"): + self.attach.configure(textvariable=self.var_update) + self.attach.bind("", appear) + self.var_update.trace_add("write", self._update) + + def fade_out(self): + for i in range(100, 0, -10): + if not self.winfo_exists(): + break + self.attributes("-alpha", i / 100) + self.update() + time.sleep(1 / 100) + + def fade_in(self): + for i in range(0, 100, 10): + if not self.winfo_exists(): + break + self.attributes("-alpha", i / 100) + self.update() + time.sleep(1 / 100) + + def _init_buttons(self, **button_kwargs): + self.i = 0 + self.widgets = {} + for row in self.values: + self.widgets[self.i] = customtkinter.CTkButton( + self.frame, + text=row, + height=self.button_height, + fg_color=self.button_color, + text_color=self.text_color, + image=self.image_values[self.i] + if self.image_values is not None + else None, + anchor=self.justify, + command=lambda k=row: self._attach_key_press(k), + **button_kwargs + ) + self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0)) + self.i += 1 + + self.hide = False + + def destroy_popup(self): + self.destroy() + self.disable = True + + def place_dropdown(self): + self.x_pos = ( + self.attach.winfo_rootx() + if self.x is None + else self.x + self.attach.winfo_rootx() + ) + self.y_pos = ( + self.attach.winfo_rooty() + self.attach.winfo_reqheight() + 5 + if self.y is None + else self.y + self.attach.winfo_rooty() + ) + self.width_new = self.attach.winfo_width() if self.width is None else self.width + + if self.resize: + if self.button_num <= 5: + self.height_new = self.button_height * self.button_num + 55 + else: + self.height_new = self.button_height * self.button_num + 35 + if self.height_new > self.height: + self.height_new = self.height + + self.geometry( + "{}x{}+{}+{}".format( + self.width_new, self.height_new, self.x_pos, self.y_pos + ) + ) + self.fade_in() + self.attributes("-alpha", self.alpha) + self.attach.focus() + + def _iconify(self): + if self.attach.cget("state") == "disabled": + return + if self.disable: + return + if self.hide: + self.event_generate("<>") + self._deiconify() + self.focus() + self.hide = False + self.place_dropdown() + if self.focus_something: + self.dummy_entry.pack() + self.dummy_entry.focus_set() + self.after(100, self.dummy_entry.pack_forget) + else: + self.withdraw() + self.hide = True + + def _attach_key_press(self, k): + self.event_generate("<>") + self.fade = True + if self.command: + self.command(k) + self.fade = False + self.fade_out() + self.withdraw() + self.hide = True + + def live_update(self, string=None): + if not self.appear: + return + if self.disable: + return + if self.fade: + return + if string: + string = string.lower() + self._deiconify() + i = 1 + for key in self.widgets.keys(): + s = self.widgets[key].cget("text").lower() + text_similarity = difflib.SequenceMatcher( + None, s[0 : len(string)], string + ).ratio() + similar = s.startswith(string) or text_similarity > 0.75 + if not similar: + self.widgets[key].pack_forget() + else: + self.widgets[key].pack(fill="x", pady=2, padx=(self.padding, 0)) + i += 1 + + if i == 1: + self.no_match.pack(fill="x", pady=2, padx=(self.padding, 0)) + else: + self.no_match.pack_forget() + self.button_num = i + self.place_dropdown() + + else: + self.no_match.pack_forget() + self.button_num = len(self.values) + for key in self.widgets.keys(): + self.widgets[key].destroy() + self._init_buttons() + self.place_dropdown() + + self.frame._parent_canvas.yview_moveto(0.0) + self.appear = False + + def insert(self, value, **kwargs): + self.widgets[self.i] = customtkinter.CTkButton( + self.frame, + text=value, + height=self.button_height, + fg_color=self.button_color, + text_color=self.text_color, + anchor=self.justify, + command=lambda k=value: self._attach_key_press(k), + **kwargs + ) + self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0)) + self.i += 1 + self.values.append(value) + + def _deiconify(self): + if len(self.values) > 0: + self.deiconify() + + def popup(self, x=None, y=None): + self.x = x + self.y = y + self.hide = True + self._iconify() + + def configure(self, **kwargs): + if "height" in kwargs: + self.height = kwargs.pop("height") + self.height_new = self.height + + if "alpha" in kwargs: + self.alpha = kwargs.pop("alpha") + + if "width" in kwargs: + self.width = kwargs.pop("width") + + if "fg_color" in kwargs: + self.frame.configure(fg_color=kwargs.pop("fg_color")) + + if "values" in kwargs: + self.values = kwargs.pop("values") + self.image_values = None + self.button_num = len(self.values) + for key in self.widgets.keys(): + self.widgets[key].destroy() + self._init_buttons() + + if "image_values" in kwargs: + self.image_values = kwargs.pop("image_values") + self.image_values = ( + None + if len(self.image_values) != len(self.values) + else self.image_values + ) + if self.image_values is not None: + i = 0 + for key in self.widgets.keys(): + self.widgets[key].configure(image=self.image_values[i]) + i += 1 + + if "button_color" in kwargs: + for key in self.widgets.keys(): + self.widgets[key].configure(fg_color=kwargs.pop("button_color")) + + if "hover_color" not in kwargs: + kwargs["hover_color"] = self.hover_color + + for key in self.widgets.keys(): + self.widgets[key].configure(**kwargs) diff --git a/src/view/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown_frame.py b/src/view/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown_frame.py new file mode 100644 index 0000000..0dcaff5 --- /dev/null +++ b/src/view/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown_frame.py @@ -0,0 +1,399 @@ +""" +Advanced Scrollable Dropdown Frame class for customtkinter widgets +Author: Akash Bora +""" + +import difflib +import sys + +import customtkinter + + +class CTkScrollableDropdownFrame(customtkinter.CTkFrame): + def __init__( + self, + attach, + x=None, + y=None, + button_color=None, + height: int = 200, + width: int = None, + fg_color=None, + button_height: int = 20, + justify="center", + scrollbar_button_color=None, + scrollbar=True, + scrollbar_button_hover_color=None, + frame_border_width=2, + values=[], + command=None, + image_values=[], + double_click=False, + frame_corner_radius=True, + resize=True, + frame_border_color=None, + text_color=None, + autocomplete=False, + **button_kwargs + ): + super().__init__( + master=attach.winfo_toplevel(), bg_color=attach.cget("bg_color") + ) + + self.attach = attach + self.corner = 11 if frame_corner_radius else 0 + self.padding = 0 + self.disable = True + + self.hide = True + self.attach.bind( + "", + lambda e: self._withdraw() if not self.disable else None, + add="+", + ) + self.attach.winfo_toplevel().bind( + "", + lambda e: self._withdraw() if not self.disable else None, + add="+", + ) + + self.disable = False + self.fg_color = ( + customtkinter.ThemeManager.theme["CTkFrame"]["fg_color"] + if fg_color is None + else fg_color + ) + self.scroll_button_color = ( + customtkinter.ThemeManager.theme["CTkScrollbar"]["button_color"] + if scrollbar_button_color is None + else scrollbar_button_color + ) + self.scroll_hover_color = ( + customtkinter.ThemeManager.theme["CTkScrollbar"]["button_hover_color"] + if scrollbar_button_hover_color is None + else scrollbar_button_hover_color + ) + self.frame_border_color = ( + customtkinter.ThemeManager.theme["CTkFrame"]["border_color"] + if frame_border_color is None + else frame_border_color + ) + self.button_color = ( + customtkinter.ThemeManager.theme["CTkFrame"]["top_fg_color"] + if button_color is None + else button_color + ) + self.text_color = ( + customtkinter.ThemeManager.theme["CTkLabel"]["text_color"] + if text_color is None + else text_color + ) + + if scrollbar is False: + self.scroll_button_color = self.fg_color + self.scroll_hover_color = self.fg_color + + self.frame = customtkinter.CTkScrollableFrame( + self, + fg_color=self.fg_color, + bg_color=attach.cget("bg_color"), + scrollbar_button_hover_color=self.scroll_hover_color, + corner_radius=self.corner, + border_width=frame_border_width, + scrollbar_button_color=self.scroll_button_color, + border_color=self.frame_border_color, + ) + self.frame._scrollbar.grid_configure(padx=3) + self.frame.pack(expand=True, fill="both") + + if self.corner == 0: + self.corner = 21 + + self.dummy_entry = customtkinter.CTkEntry( + self.frame, fg_color="transparent", border_width=0, height=1, width=1 + ) + self.no_match = customtkinter.CTkLabel(self.frame, text="No Match") + self.height = height + self.height_new = height + self.width = width + self.command = command + self.fade = False + self.resize = resize + self.autocomplete = autocomplete + self.var_update = customtkinter.StringVar() + self.appear = False + + if justify.lower() == "left": + self.justify = "w" + elif justify.lower() == "right": + self.justify = "e" + else: + self.justify = "c" + + self.button_height = button_height + self.values = values + self.button_num = len(self.values) + self.image_values = ( + None if len(image_values) != len(self.values) else image_values + ) + + self._init_buttons(**button_kwargs) + + # Add binding for different ctk widgets + if ( + double_click + or self.attach.winfo_name().startswith("!ctkentry") + or self.attach.winfo_name().startswith("!ctkcombobox") + ): + self.attach.bind("", lambda e: self._iconify(), add="+") + self.attach._entry.bind( + "", + lambda e: self._withdraw() if not self.disable else None, + add="+", + ) + else: + self.attach.bind("", lambda e: self._iconify(), add="+") + + if self.attach.winfo_name().startswith("!ctkcombobox"): + self.attach._canvas.tag_bind( + "right_parts", "", lambda e: self._iconify() + ) + self.attach._canvas.tag_bind( + "dropdown_arrow", "", lambda e: self._iconify() + ) + + if self.command is None: + self.command = self.attach.set + + if self.attach.winfo_name().startswith("!ctkoptionmenu"): + self.attach._canvas.bind("", lambda e: self._iconify()) + self.attach._text_label.bind("", lambda e: self._iconify()) + if self.command is None: + self.command = self.attach.set + + self.x = x + self.y = y + + self.attach.bind("", lambda _: self._destroy(), add="+") + + if self.autocomplete: + self.bind_autocomplete() + + def _destroy(self): + self.after(500, self.destroy_popup) + + def _withdraw(self): + if self.winfo_viewable() and self.hide: + self.place_forget() + + self.event_generate("<>") + self.hide = True + + def _update(self, a, b, c): + self.live_update(self.attach._entry.get()) + + def bind_autocomplete(self): + def appear(x): + self.appear = True + + if self.attach.winfo_name().startswith("!ctkcombobox"): + self.attach._entry.configure(textvariable=self.var_update) + self.attach.set(self.values[0]) + self.attach._entry.bind("", appear) + self.var_update.trace_add("write", self._update) + + if self.attach.winfo_name().startswith("!ctkentry"): + self.attach.configure(textvariable=self.var_update) + self.attach.bind("", appear) + self.var_update.trace_add("write", self._update) + + def _init_buttons(self, **button_kwargs): + self.i = 0 + self.widgets = {} + for row in self.values: + self.widgets[self.i] = customtkinter.CTkButton( + self.frame, + text=row, + height=self.button_height, + fg_color=self.button_color, + text_color=self.text_color, + image=self.image_values[self.i] + if self.image_values is not None + else None, + anchor=self.justify, + command=lambda k=row: self._attach_key_press(k), + **button_kwargs + ) + self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0)) + self.i += 1 + + self.hide = False + + def destroy_popup(self): + self.destroy() + self.disable = True + + def place_dropdown(self): + self.x_pos = ( + self.attach.winfo_x() + if self.x is None + else self.x + self.attach.winfo_rootx() + ) + self.y_pos = ( + self.attach.winfo_y() + self.attach.winfo_reqheight() + 5 + if self.y is None + else self.y + self.attach.winfo_rooty() + ) + self.width_new = ( + self.attach.winfo_width() - 45 + self.corner + if self.width is None + else self.width + ) + + if self.resize: + if self.button_num <= 5: + self.height_new = self.button_height * self.button_num + 55 + else: + self.height_new = self.button_height * self.button_num + 35 + if self.height_new > self.height: + self.height_new = self.height + + self.frame.configure(width=self.width_new, height=self.height_new) + self.place(x=self.x_pos, y=self.y_pos) + + if sys.platform.startswith("darwin"): + self.dummy_entry.pack() + self.after(100, self.dummy_entry.pack_forget()) + + self.lift() + self.attach.focus() + + def _iconify(self): + if self.attach.cget("state") == "disabled": + return + if self.disable: + return + if self.hide: + self.event_generate("<>") + self.hide = False + self.place_dropdown() + else: + self.place_forget() + self.hide = True + + def _attach_key_press(self, k): + self.event_generate("<>") + self.fade = True + if self.command: + self.command(k) + self.fade = False + self.place_forget() + self.hide = True + + def live_update(self, string=None): + if not self.appear: + return + if self.disable: + return + if self.fade: + return + if string: + string = string.lower() + self._deiconify() + i = 1 + for key in self.widgets.keys(): + s = self.widgets[key].cget("text").lower() + text_similarity = difflib.SequenceMatcher( + None, s[0 : len(string)], string + ).ratio() + similar = s.startswith(string) or text_similarity > 0.75 + if not similar: + self.widgets[key].pack_forget() + else: + self.widgets[key].pack(fill="x", pady=2, padx=(self.padding, 0)) + i += 1 + + if i == 1: + self.no_match.pack(fill="x", pady=2, padx=(self.padding, 0)) + else: + self.no_match.pack_forget() + self.button_num = i + self.place_dropdown() + + else: + self.no_match.pack_forget() + self.button_num = len(self.values) + for key in self.widgets.keys(): + self.widgets[key].destroy() + self._init_buttons() + self.place_dropdown() + + self.frame._parent_canvas.yview_moveto(0.0) + self.appear = False + + def insert(self, value, **kwargs): + self.widgets[self.i] = customtkinter.CTkButton( + self.frame, + text=value, + height=self.button_height, + fg_color=self.button_color, + text_color=self.text_color, + anchor=self.justify, + command=lambda k=value: self._attach_key_press(k), + **kwargs + ) + self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0)) + self.i += 1 + self.values.append(value) + + def _deiconify(self): + if len(self.values) > 0: + self.pack_forget() + + def popup(self, x=None, y=None): + self.x = x + self.y = y + self.hide = True + self._iconify() + + def configure(self, **kwargs): + if "height" in kwargs: + self.height = kwargs.pop("height") + self.height_new = self.height + + if "alpha" in kwargs: + self.alpha = kwargs.pop("alpha") + + if "width" in kwargs: + self.width = kwargs.pop("width") + + if "fg_color" in kwargs: + self.frame.configure(fg_color=kwargs.pop("fg_color")) + + if "values" in kwargs: + self.values = kwargs.pop("values") + self.image_values = None + self.button_num = len(self.values) + for key in self.widgets.keys(): + self.widgets[key].destroy() + self._init_buttons() + + if "image_values" in kwargs: + self.image_values = kwargs.pop("image_values") + self.image_values = ( + None + if len(self.image_values) != len(self.values) + else self.image_values + ) + if self.image_values is not None: + i = 0 + for key in self.widgets.keys(): + self.widgets[key].configure(image=self.image_values[i]) + i += 1 + + if "button_color" in kwargs: + for key in self.widgets.keys(): + self.widgets[key].configure(fg_color=kwargs.pop("button_color")) + + for key in self.widgets.keys(): + self.widgets[key].configure(**kwargs) diff --git a/src/view/main_window.py b/src/view/main_window.py new file mode 100644 index 0000000..8d2c115 --- /dev/null +++ b/src/view/main_window.py @@ -0,0 +1,774 @@ +import locale +import tkinter + +import customtkinter as ctk +import utils.config_manager as cm +import utils.constants as c +import utils.dict_utils as du +import utils.path_helper as ph +from controller.main_controller import MainController +from model.config.config_google_api import ConfigGoogleApi +from model.config.config_subtitles import ConfigSubtitles +from model.config.config_whisperx import ConfigWhisperX +from model.transcription import Transcription +from PIL import Image +from utils.enums import AudioSource, Color, ComputeType, ModelSize, TranscriptionMethod +from utils.i18n import _ + +from .custom_widgets.ctk_input_dialog import CTkInputDialog +from .custom_widgets.ctk_scrollable_dropdown import CTkScrollableDropdown + + +class MainWindow(ctk.CTkFrame): + def __init__( + self, + parent, + config_whisperx: ConfigWhisperX, + config_google_api: ConfigGoogleApi, + config_subtitles: ConfigSubtitles, + ): + super().__init__(parent) + + # Configure grid of the window + self.grid_columnconfigure(1, weight=1) + self.grid_rowconfigure(2, weight=1) + + # Init the configs + self._config_whisperx = config_whisperx + self._config_google_api = config_google_api + self._config_subtitles = config_subtitles + + # Init the controller + self._controller = None + + # Init the components of the window + self._init_sidebar() + self._init_main_content() + + # State + self._transcribe_from_source = AudioSource.FILE + self._is_transcribing_from_mic = False + + # To handle debouncing + self._after_id = None # To store the `after()` method ID + + # GETTERS AND SETTERS + + def set_controller(self, controller: MainController): + """ + Set the controller of the window. + + :param controller: View controller + :type controller: MainController + """ + self._controller = controller + + def _get_language_code(self): + return du.find_key_by_value( + dictionary=c.AUDIO_LANGUAGES, target_value=self.omn_audio_language.get() + ) + + def _get_whisperx_args(self): + whisperx_args = {} + if self.radio_var.get() == TranscriptionMethod.WHISPERX.value: + whisperx_args["should_translate"] = ( + self.chk_whisper_options_translate.get() == 1 + ) + whisperx_args["should_subtitle"] = ( + self.chk_whisper_options_subtitles.get() == 1 + ) + + return whisperx_args + + # WIDGETS INITIALIZATION + + def _init_sidebar(self): + # Sidebar frame + self.frm_sidebar = ctk.CTkScrollableFrame( + master=self, width=230, corner_radius=0 + ) + self.frm_sidebar.grid(row=0, column=0, rowspan=4, sticky=ctk.NSEW) + self.frm_sidebar.grid_rowconfigure(10, weight=1) + + # Logo label + self.logo_image = ctk.CTkImage( + light_image=Image.open( + ph.ROOT_PATH / ph.IMG_RELATIVE_PATH / "icon-light.png" + ), + dark_image=Image.open( + ph.ROOT_PATH / ph.IMG_RELATIVE_PATH / "icon-dark.png" + ), + size=(32, 32), + ) + + self.lbl_logo = ctk.CTkLabel( + master=self.frm_sidebar, + text=f" {c.APP_NAME}", + image=self.logo_image, + compound=ctk.LEFT, + font=ctk.CTkFont(size=22, weight="bold"), + ) + self.lbl_logo.grid(row=0, column=0, padx=20, pady=(19, 0)) + + # ------------------ + + # Shared options frame + self.frm_shared_options = ctk.CTkFrame(master=self.frm_sidebar, border_width=2) + self.frm_shared_options.grid(row=1, column=0, padx=20, pady=(20, 0)) + + ## 'Audio language' option menu + self.lbl_audio_language = ctk.CTkLabel( + master=self.frm_shared_options, + text=_("Audio language"), + font=ctk.CTkFont(size=14, weight="bold"), + ) + self.lbl_audio_language.grid(row=0, column=0, padx=0, pady=(10, 0)) + + self.omn_audio_language = ctk.CTkOptionMenu(master=self.frm_shared_options) + CTkScrollableDropdown( + attach=self.omn_audio_language, + values=list(c.AUDIO_LANGUAGES.values()), + alpha=1, + ) + self.omn_audio_language.grid(row=1, column=0, padx=20, pady=0, sticky=ctk.EW) + try: + self.omn_audio_language.set( + c.AUDIO_LANGUAGES[locale.getdefaultlocale()[0][:2]] + ) + except Exception: + self.omn_audio_language.set("English") + + ## 'Transcribe from' option menu + self.lbl_transcribe_from = ctk.CTkLabel( + master=self.frm_shared_options, + text="Transcribe from", + font=ctk.CTkFont(size=14, weight="bold"), + ) + self.lbl_transcribe_from.grid(row=2, column=0, padx=0, pady=(15, 0)) + + self.omn_transcribe_from = ctk.CTkOptionMenu( + master=self.frm_shared_options, + values=[e.value for e in AudioSource], + command=self._on_change_transcribe_from_event, + ) + self.omn_transcribe_from.grid(row=3, column=0, padx=20, pady=0, sticky=ctk.EW) + self.omn_transcribe_from.set(AudioSource.FILE.value) + + ## 'Generate transcription' button + self.btn_generate_transcription = ctk.CTkButton( + master=self.frm_shared_options, + fg_color="green", + hover_color="darkgreen", + text=_("Generate transcription"), + command=lambda: self._on_generate_transcription(), + ) + self.btn_generate_transcription.grid( + row=4, column=0, padx=20, pady=(25, 20), sticky=ctk.EW + ) + + # ------------------ + + # 'Transcribe using' frame + self.frm_transcribe_using = ctk.CTkFrame( + master=self.frm_sidebar, border_width=2 + ) + self.frm_transcribe_using.grid(row=2, column=0, padx=0, pady=(20, 0)) + + # 'Transcribe using' label + self.lbl_transcribe_using = ctk.CTkLabel( + master=self.frm_transcribe_using, + text=_("Transcribe using"), + font=ctk.CTkFont(size=14, weight="bold"), # 14 is the default size + ) + self.lbl_transcribe_using.grid(row=0, column=0, padx=0, pady=(10, 12.5)) + + self.radio_var = tkinter.IntVar(value=TranscriptionMethod.WHISPERX.value) + + self.rbt_transcribe_using_whisper = ctk.CTkRadioButton( + master=self.frm_transcribe_using, + variable=self.radio_var, + value=TranscriptionMethod.WHISPERX.value, + text="WhisperX (local)", + command=self._on_transcribe_using_change, + ) + self.rbt_transcribe_using_whisper.grid( + row=1, column=0, padx=20, pady=0, sticky=ctk.W + ) + + self.rbt_transcribe_using_google = ctk.CTkRadioButton( + master=self.frm_transcribe_using, + variable=self.radio_var, + value=TranscriptionMethod.GOOGLE_API.value, + text="Google API (remote)", + command=self._on_transcribe_using_change, + ) + self.rbt_transcribe_using_google.grid( + row=2, column=0, padx=20, pady=(7.5, 16), sticky=ctk.W + ) + + # ------------------ + + # Whisper options frame + self.frm_whisper_options = ctk.CTkFrame(master=self.frm_sidebar, border_width=2) + self.frm_whisper_options.grid(row=3, column=0, padx=20, pady=(20, 0)) + + ## Title label + self.lbl_whisper_options = ctk.CTkLabel( + master=self.frm_whisper_options, + text="WhisperX options", + font=ctk.CTkFont(size=14, weight="bold"), # 14 is the default size + ) + self.lbl_whisper_options.grid(row=0, column=0, padx=10, pady=(10, 12.5)) + + ## 'Translate to English' checkbox + self.chk_whisper_options_translate = ctk.CTkCheckBox( + master=self.frm_whisper_options, + text="Translate to English", + command=self._on_chk_whisper_options_translate_change, + ) + self.chk_whisper_options_translate.grid( + row=1, column=0, padx=20, pady=0, sticky=ctk.W + ) + + ## 'Subtitles' checkbox + self.chk_whisper_options_subtitles = ctk.CTkCheckBox( + master=self.frm_whisper_options, + text="Generate subtitles", + command=self._on_whisper_options_subtitles_change, + ) + self.chk_whisper_options_subtitles.grid( + row=2, column=0, padx=20, pady=(10, 0), sticky=ctk.W + ) + + ## 'Show advanced options' button + self.btn_whisperx_show_advanced_options = ctk.CTkButton( + master=self.frm_whisper_options, + text=_("Show advanced options"), + command=self._on_show_advanced_options, + ) + self.btn_whisperx_show_advanced_options.grid( + row=3, column=0, padx=20, pady=16, sticky=ctk.EW + ) + + # ------------------ + + # 'Google API options' frame + self.frm_google_api_options = ctk.CTkFrame( + master=self.frm_sidebar, border_width=2 + ) + self.frm_google_api_options.grid( + row=3, column=0, padx=20, pady=(20, 0), sticky=ctk.EW + ) + # Hidden at first because WhisperX is the default transcription method + self.frm_google_api_options.grid_remove() + + ## Title label + self.lbl_google_api_options = ctk.CTkLabel( + master=self.frm_google_api_options, + text="Google API options", + font=ctk.CTkFont(size=14, weight="bold"), # 14 is the default size + ) + self.lbl_google_api_options.grid(row=0, column=0, padx=10, pady=(10, 12.5)) + + ## 'Set API key' button + self.btn_set_google_api_key = ctk.CTkButton( + master=self.frm_google_api_options, + text=_("Set API key"), + command=self._on_set_google_api_key, + ) + self.btn_set_google_api_key.grid( + row=1, column=0, padx=20, pady=(0, 20), sticky=ctk.EW + ) + + # ------------------ + + # Subtitle options frame + self.frm_subtitle_options = ctk.CTkFrame( + master=self.frm_sidebar, border_width=2 + ) + self.frm_subtitle_options.grid( + row=4, column=0, padx=20, pady=(20, 0), sticky=ctk.EW + ) + self.frm_subtitle_options.grid_remove() # Hidden by default + + ## Title label + self.lbl_subtitle_options = ctk.CTkLabel( + master=self.frm_subtitle_options, + text="Subtitle options", + font=ctk.CTkFont(size=14, weight="bold"), # 14 is the default size + ) + self.lbl_subtitle_options.grid( + row=0, column=0, padx=40, pady=(10, 0), sticky=ctk.EW + ) + + ## 'Highlight words' check box + self.chk_highlight_words = ctk.CTkCheckBox( + master=self.frm_subtitle_options, + text="Highlight words", + command=lambda: self._on_config_change( + section=ConfigSubtitles.Key.SECTION, + key=ConfigSubtitles.Key.HIGHLIGHT_WORDS, + new_value="True" if self.chk_highlight_words.get() else "False", + ), + ) + self.chk_highlight_words.grid(row=1, column=0, padx=20, pady=10, sticky=ctk.W) + + ## 'Max. line count' entry + self.lbl_max_line_count = ctk.CTkLabel( + master=self.frm_subtitle_options, + text=_("Max. line count"), + ) + self.lbl_max_line_count.grid( + row=2, column=0, padx=(52, 0), pady=0, sticky=ctk.W + ) + + self.max_line_count = ctk.StringVar( + self, str(self._config_subtitles.max_line_count) + ) + self._setup_debounced_change( + section=ConfigSubtitles.Key.SECTION, + key=ConfigSubtitles.Key.MAX_LINE_COUNT, + variable=self.max_line_count, + callback=self._on_config_change, + ) + + self.ent_max_line_count = ctk.CTkEntry( + master=self.frm_subtitle_options, + width=28, + textvariable=self.max_line_count, + ) + self.ent_max_line_count.grid( + row=2, column=0, padx=(18, 20), pady=0, sticky=ctk.W + ) + + ## 'Max. line width' entry + self.lbl_max_line_width = ctk.CTkLabel( + master=self.frm_subtitle_options, + text=_("Max. line width"), + ) + self.lbl_max_line_width.grid( + row=3, column=0, padx=(52, 0), pady=(10, 14), sticky=ctk.W + ) + + self.max_line_width = ctk.StringVar( + self, str(self._config_subtitles.max_line_width) + ) + self._setup_debounced_change( + section=ConfigSubtitles.Key.SECTION, + key=ConfigSubtitles.Key.MAX_LINE_WIDTH, + variable=self.max_line_width, + callback=self._on_config_change, + ) + + self.ent_max_line_width = ctk.CTkEntry( + master=self.frm_subtitle_options, + width=28, + textvariable=self.max_line_width, + ) + self.ent_max_line_width.grid( + row=3, column=0, padx=(18, 20), pady=(10, 14), sticky=ctk.W + ) + + # ------------------ + + # WhisperX advanced options frame + self.frm_whisperx_advanced_options = ctk.CTkFrame( + master=self.frm_sidebar, border_width=2 + ) + self.frm_whisperx_advanced_options.grid( + row=5, column=0, padx=20, pady=(20, 0), sticky=ctk.EW + ) + self.frm_whisperx_advanced_options.grid_remove() # Hidden by default + + ## Title label + self.lbl_advanced_options = ctk.CTkLabel( + master=self.frm_whisperx_advanced_options, + text="Advanced options", + font=ctk.CTkFont(size=14, weight="bold"), # 14 is the default size + ) + self.lbl_advanced_options.grid( + row=0, column=0, padx=10, pady=(10, 5), sticky=ctk.EW + ) + + ## 'Model size' option menu + self.lbl_model_size = ctk.CTkLabel( + master=self.frm_whisperx_advanced_options, + text="Model size", + ) + self.lbl_model_size.grid(row=1, column=0, padx=20, pady=0, sticky=ctk.W) + + self.omn_model_size = ctk.CTkOptionMenu( + master=self.frm_whisperx_advanced_options, + values=[model_size.value for model_size in ModelSize.__members__.values()], + command=lambda *args: self._on_config_change( + section=ConfigWhisperX.Key.SECTION, + key=ConfigWhisperX.Key.MODEL_SIZE, + new_value=self.omn_model_size.get(), + ), + ) + self.omn_model_size.grid(row=2, column=0, padx=20, pady=(3, 10), sticky=ctk.EW) + self.omn_model_size.set(self._config_whisperx.model_size) + + ## 'Compute type' option menu + self.lbl_compute_type = ctk.CTkLabel( + master=self.frm_whisperx_advanced_options, + text="Compute type", + ) + self.lbl_compute_type.grid(row=3, column=0, padx=20, pady=0, sticky=ctk.W) + + self.omn_compute_type = ctk.CTkOptionMenu( + master=self.frm_whisperx_advanced_options, + values=[ + compute_type.value for compute_type in ComputeType.__members__.values() + ], + command=lambda *args: self._on_config_change( + section=ConfigWhisperX.Key.SECTION, + key=ConfigWhisperX.Key.COMPUTE_TYPE, + new_value=self.omn_compute_type.get(), + ), + ) + self.omn_compute_type.grid( + row=4, column=0, padx=20, pady=(3, 17), sticky=ctk.EW + ) + self.omn_compute_type.set(self._config_whisperx.compute_type) + + ## 'Batch size' entry + self.lbl_batch_size = ctk.CTkLabel( + master=self.frm_whisperx_advanced_options, + text="Batch size", + ) + self.lbl_batch_size.grid(row=5, column=0, padx=(50, 0), pady=0, sticky=ctk.W) + + self.batch_size = ctk.StringVar(self, str(self._config_whisperx.batch_size)) + self._setup_debounced_change( + section=ConfigWhisperX.Key.SECTION, + key=ConfigWhisperX.Key.BATCH_SIZE, + variable=self.batch_size, + callback=self._on_config_change, + ) + + self.ent_batch_size = ctk.CTkEntry( + master=self.frm_whisperx_advanced_options, + width=28, + textvariable=self.batch_size, + ) + self.ent_batch_size.grid(row=5, column=0, padx=(18, 20), pady=0, sticky=ctk.W) + + ## 'Use CPU' checkbox + self.chk_use_cpu = ctk.CTkCheckBox( + master=self.frm_whisperx_advanced_options, + text="Use CPU", + command=lambda: self._on_config_change( + section=ConfigWhisperX.Key.SECTION, + key=ConfigWhisperX.Key.USE_CPU, + new_value="True" if self.chk_use_cpu.get() else "False", + ), + ) + self.chk_use_cpu.grid(row=6, column=0, padx=20, pady=(10, 16), sticky=ctk.W) + + if self._config_whisperx.use_cpu: + self.chk_use_cpu.select() + + if not self._config_whisperx.can_use_gpu: + self.chk_use_cpu.select() + self.chk_use_cpu.configure(state=ctk.DISABLED) + + # ------------------ + + ## 'Appearance mode' option menu + self.lbl_appearance_mode = ctk.CTkLabel( + master=self.frm_sidebar, + text=_("Appearance mode"), + anchor=ctk.W, + font=ctk.CTkFont(size=14, weight="bold"), + ) + self.lbl_appearance_mode.grid(row=12, column=0, padx=20, pady=(50, 0)) + + self.omn_appearance_mode = ctk.CTkOptionMenu( + master=self.frm_sidebar, + values=[_("System"), _("Light"), _("Dark")], + command=self._change_appearance_mode_event, + ) + self.omn_appearance_mode.grid(row=13, column=0, padx=20, pady=0, sticky=ctk.EW) + + ## Info label + self.lbl_info = ctk.CTkLabel( + master=self.frm_sidebar, + text="v2.2.0 | Made by HenestrosaDev", + font=ctk.CTkFont(size=12), + ) + self.lbl_info.grid(row=14, column=0, padx=20, pady=(5, 10)) + + def _init_main_content(self): + # Main entry frame + self.frm_main_entry = ctk.CTkFrame(master=self, fg_color="transparent") + self.frm_main_entry.grid(row=0, column=1, padx=20, pady=(20, 0), sticky=ctk.EW) + self.frm_main_entry.grid_columnconfigure(1, weight=1) + + ## 'Path' entry + self.lbl_path = ctk.CTkLabel( + master=self.frm_main_entry, + text="File path", + font=ctk.CTkFont(size=14, weight="bold"), + ) + self.lbl_path.grid(row=0, column=0, padx=(0, 15), sticky=ctk.W) + + self.ent_path = ctk.CTkEntry(master=self.frm_main_entry) + self.ent_path.grid(row=0, column=1, padx=0, sticky=ctk.EW) + + ## File explorer image button + self.img_file_explorer = ctk.CTkImage( + Image.open(ph.ROOT_PATH / ph.IMG_RELATIVE_PATH / "file-explorer.png"), + size=(24, 24), + ) + self.btn_file_explorer = ctk.CTkButton( + self.frm_main_entry, + image=self.img_file_explorer, + text="", + width=32, + command=self._on_select_file, + ) + self.btn_file_explorer.grid(row=0, column=2, padx=(15, 0), sticky=ctk.E) + + ## Textbox + self.tbx_transcription = ctk.CTkTextbox(master=self, wrap=ctk.WORD) + self.tbx_transcription.grid(row=2, column=1, padx=20, pady=20, sticky=ctk.NSEW) + + ## Progress bar + self.progress_bar = ctk.CTkProgressBar(master=self) + self.progress_bar.configure(mode="indeterminate") + + ## 'Save transcription' button + self.btn_save = ctk.CTkButton( + master=self, + fg_color="green", + hover_color="darkgreen", + text=_("Save transcription"), + command=self._on_save_transcription, + ) + self.btn_save.grid(row=3, column=1, padx=20, pady=(0, 20), sticky=ctk.EW) + + # PUBLIC METHODS + + def on_select_file_success(self, filepath): + self.ent_path.configure(textvariable=ctk.StringVar(self, filepath)) + + def on_processing_transcription(self): + # Disable action buttons to avoid multiple requests at the same time + self.ent_path.configure(state=ctk.DISABLED) + self.omn_transcribe_from.configure(state=ctk.DISABLED) + self.omn_audio_language.configure(state=ctk.DISABLED) + + if not self._is_transcribing_from_mic: + self.btn_generate_transcription.configure(state=ctk.DISABLED) + + # Show progress bar + self._toggle_progress_bar_visibility(should_show=True) + + # Remove previous text + self.display_text("") + + def on_processed_transcription(self, success: bool): + # Re-enable disabled widgets + self.ent_path.configure(state=ctk.NORMAL) + self.omn_transcribe_from.configure(state=ctk.NORMAL) + self.omn_audio_language.configure(state=ctk.NORMAL) + self.btn_generate_transcription.configure(state=ctk.NORMAL) + + self._toggle_progress_bar_visibility(should_show=False) + + # HELPER METHODS + + def display_text(self, text): + self.tbx_transcription.delete("1.0", ctk.END) + self.tbx_transcription.insert("0.0", text) + + # PRIVATE METHODS + + def _setup_debounced_change(self, section, key, variable, callback, *unused): + variable.trace_add( + mode="write", + callback=lambda *args: self._on_change_debounced( + section, key, variable, callback + ), + ) + + def _on_change_debounced(self, section, key, variable, callback, delay=600): + # Cancel the previously scheduled after call + if self._after_id is not None: + self.after_cancel(self._after_id) + + # Schedule a new after call with the specified delay + self._after_id = self.after( + delay, lambda: callback(section, key, variable.get()) + ) + + def _on_change_app_language(self, language_name: str): + self._controller.change_app_language(language_name) + + def _on_change_transcribe_from_event(self, option: str): + self._transcribe_from_source = AudioSource(option) + self.ent_path.configure(textvariable=ctk.StringVar(self, "")) + + if self._transcribe_from_source == AudioSource.FILE: + self.btn_generate_transcription.configure(text="Generate transcription") + self.lbl_path.configure(text="File path") + self.btn_file_explorer.grid() + self.frm_main_entry.grid() + + elif self._transcribe_from_source == AudioSource.MIC: + self.btn_generate_transcription.configure(text="Start recording") + self.frm_main_entry.grid_remove() + + elif self._transcribe_from_source == AudioSource.YOUTUBE: + self.btn_generate_transcription.configure(text="Generate transcription") + self.lbl_path.configure(text="YouTube video URL") + self.btn_file_explorer.grid_remove() + self.frm_main_entry.grid() + + def _on_select_file(self): + self._controller.select_file() + + def _on_transcribe_from_mic(self): + if self._is_transcribing_from_mic: + self.stop_recording_from_mic() + else: + self._start_recording_from_mic() + + def _start_recording_from_mic(self): + self._is_transcribing_from_mic = True + + self.btn_generate_transcription.configure( + fg_color=(Color.LIGHT_RED.value, Color.DARK_RED.value), + hover_color=( + Color.HOVER_LIGHT_RED.value, + Color.HOVER_DARK_RED.value, + ), + text=_("Stop recording"), + ) + + transcription = Transcription( + source=AudioSource.MIC, + language_code=self._get_language_code(), + method=self.radio_var.get(), + **self._get_whisperx_args(), + ) + self._controller.prepare_for_transcription(transcription) + + def stop_recording_from_mic(self): + self._is_transcribing_from_mic = False + + self.btn_generate_transcription.configure( + fg_color="green", + hover_color="darkgreen", + text="Start recording", + state=ctk.DISABLED, + ) + + self._controller.stop_recording_from_mic() + + def _on_generate_transcription(self): + self.ent_path.configure(state=ctk.DISABLED) + self.omn_transcribe_from.configure(state=ctk.DISABLED) + self.omn_audio_language.configure(state=ctk.DISABLED) + + transcription = Transcription( + language_code=self._get_language_code(), + method=self.radio_var.get(), + **self._get_whisperx_args(), + ) + + if self._transcribe_from_source == AudioSource.FILE: + transcription.source = AudioSource.FILE + transcription.source_file_path = self.ent_path.get() + + self._controller.prepare_for_transcription(transcription) + + elif self._transcribe_from_source == AudioSource.MIC: + self._on_transcribe_from_mic() + + elif self._transcribe_from_source == AudioSource.YOUTUBE: + transcription.source = AudioSource.YOUTUBE + transcription.youtube_url = self.ent_path.get() + + self._controller.prepare_for_transcription(transcription) + + def _on_save_transcription(self): + self._controller.save_transcription() + + def _on_transcribe_using_change(self): + if self.radio_var.get() == TranscriptionMethod.WHISPERX.value: + self.frm_whisper_options.grid() + self.frm_google_api_options.grid_remove() + elif self.radio_var.get() == TranscriptionMethod.GOOGLE_API.value: + self.frm_whisper_options.grid_remove() + self.frm_whisperx_advanced_options.grid_remove() + self.btn_whisperx_show_advanced_options.configure( + text=_("Show advanced options") + ) + self.frm_google_api_options.grid() + + def _on_set_google_api_key(self): + old_api_key = self._config_google_api.api_key + + dialog = CTkInputDialog( + title="Google API key", + label_text="Type in the API key:", + entry_text=old_api_key, + ) + + new_api_key = dialog.get_input() + + if new_api_key is not None and old_api_key != new_api_key: + self._on_config_change( + section=ConfigGoogleApi.Key.SECTION, + key=ConfigGoogleApi.Key.API_KEY, + new_value=new_api_key.strip(), + ) + + def _on_chk_whisper_options_translate_change(self): + if self.chk_whisper_options_translate.get(): + self.chk_whisper_options_subtitles.deselect() + self.chk_whisper_options_subtitles.configure(state=ctk.DISABLED) + self.frm_subtitle_options.grid_remove() + else: + self.chk_whisper_options_subtitles.configure(state=ctk.NORMAL) + + def _on_whisper_options_subtitles_change(self): + if self.chk_whisper_options_subtitles.get(): + self.frm_subtitle_options.grid() + else: + self.frm_subtitle_options.grid_remove() + + def _on_show_advanced_options(self): + if self.frm_whisperx_advanced_options.winfo_ismapped(): + self.frm_whisperx_advanced_options.grid_remove() + self.btn_whisperx_show_advanced_options.configure( + text=_("Show advanced options") + ) + else: + self.frm_whisperx_advanced_options.grid() + self.btn_whisperx_show_advanced_options.configure( + text=_("Hide advanced options") + ) + + def _toggle_progress_bar_visibility(self, should_show): + if should_show: + self.progress_bar.grid(row=2, column=1, padx=40, pady=0, sticky=ctk.EW) + self.progress_bar.start() + else: + self.progress_bar.grid_forget() + + @staticmethod + def _on_config_change(section, key, new_value): + cm.ConfigManager.modify_value(section, key, new_value) + + @staticmethod + def _change_appearance_mode_event(new_appearance_mode: str): + appearance_mode_map = { + _("Dark"): "Dark", + _("Light"): "Light", + _("System"): "System", + } + appearance_mode = appearance_mode_map.get(new_appearance_mode, "System") + ctk.set_appearance_mode(appearance_mode)