initial import of transcriber

This commit is contained in:
Sean McElwain 2026-04-01 08:30:39 -05:00
commit 40ad24f27c
52 changed files with 3274 additions and 0 deletions

126
audiotext.spec Normal file
View File

@ -0,0 +1,126 @@
# -*- mode: python ; coding: utf-8 -*-
from os.path import join
from platform import system
from PyInstaller.utils.hooks import copy_metadata
from PyInstaller.utils.hooks import collect_data_files
from shutil import copyfile
datas = [
(r'venv/Lib/site-packages/customtkinter', 'customtkinter'),
(r'venv/Lib/site-packages/transformers', 'transformers'),
(r'venv/Lib/site-packages/lightning', 'lightning'),
(r'venv/Lib/site-packages/lightning_fabric', 'lightning_fabric'),
(r'venv/Lib/site-packages/speechbrain', 'speechbrain'),
(r'venv/Lib/site-packages/pyannote', 'pyannote'),
(r'venv/Lib/site-packages/asteroid_filterbanks', 'asteroid_filterbanks'),
(r'venv/Lib/site-packages/whisperx', 'whisperx'),
('res', 'res')
]
datas += copy_metadata('torch')
datas += copy_metadata('tqdm', recursive=True)
datas += copy_metadata('regex')
datas += copy_metadata('requests')
datas += copy_metadata('packaging')
datas += copy_metadata('filelock')
datas += copy_metadata('numpy')
datas += copy_metadata('tokenizers')
datas += copy_metadata('pillow')
datas += copy_metadata('huggingface_hub')
datas += copy_metadata('safetensors')
datas += copy_metadata('pyyaml')
datas += collect_data_files('librosa')
block_cipher = None
a = Analysis(
['src/app.py'],
pathex=[],
binaries=[],
datas=datas,
hiddenimports=['huggingface_hub.repository', 'pytorch', 'sklearn.utils._cython_blas', 'sklearn.neighbors.typedefs', 'sklearn.neighbors.quad_tree', 'sklearn.tree', 'sklearn.tree._utils'],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=[],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False,
)
# Filter out unused and/or duplicate shared libs
torch_lib_paths = {
join('torch', 'lib', 'libtorch_cuda.so'),
join('torch', 'lib', 'libtorch_cpu.so'),
}
a.datas = [entry for entry in a.datas if not entry[0] in torch_lib_paths]
os_path_separator = '\\' if system() == 'Windows' else '/'
a.datas = [entry for entry in a.datas if not f'torch{os_path_separator}_C.cp' in entry[0]]
a.datas = [entry for entry in a.datas if not f'torch{os_path_separator}_dl.cp' in entry[0]]
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
if system() == 'Darwin': # macOS
exe = EXE(
pyz,
a.scripts,
a.binaries,
a.zipfiles,
a.datas,
[],
name='Audiotext',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
upx_exclude=[],
runtime_tmpdir=None,
console=False,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch='x86_64',
codesign_identity=None,
entitlements_file=None,
icon=['res/img/icon.icns'],
)
# BUNDLE statement is used to create a macOS application bundle (.app) for the program
app = BUNDLE(
exe,
name='Audiotext.app',
icon=['res/img/icon.icns'],
bundle_identifier=None,
)
else:
exe = EXE(
pyz,
a.scripts,
[],
exclude_binaries=True,
name='Audiotext',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=False,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch='x86_64',
codesign_identity=None,
entitlements_file=None,
icon=['res/img/icon.ico'],
)
coll = COLLECT(
exe,
a.binaries,
a.zipfiles,
a.datas,
strip=False,
upx=True,
upx_exclude=[],
name='audiotext',
)
copyfile('config.ini', '{0}/audiotext/config.ini'.format(DISTPATH))

15
config.ini Normal file
View File

@ -0,0 +1,15 @@
[whisperx]
model_size = large-v2
batch_size = 8
compute_type = int8
use_cpu = True
can_use_gpu = False
[google_api]
api_key =
[subtitles]
highlight_words = False
max_line_width = 2
max_line_count = 42

BIN
requirements.txt Normal file

Binary file not shown.

BIN
res/img/file-explorer.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 230 B

BIN
res/img/icon-dark.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

BIN
res/img/icon-light.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

BIN
res/img/icon.icns Normal file

Binary file not shown.

BIN
res/img/icon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

View File

@ -0,0 +1,117 @@
# Copyright (C) 2023 HenestrosaConH
# This file is distributed under the same license as the PACKAGE package.
# José Carlos López Henestrosa <henestrosaconh@gmail.com>, 2023.
#
msgid ""
msgstr ""
"Project-Id-Version: 1.3.0\n"
"Report-Msgid-Bugs-To: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"POT-Creation-Date: 2023-02-08 18:22+0100\n"
"PO-Revision-Date: 2023-02-08 18:22+0100\n"
"Last-Translator: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"Language-Team: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: src\controller\main_controller.py:35
msgid "Select a file"
msgstr "Select a file"
#: src\controller\main_controller.py:37
msgid "Audio files"
msgstr "Audio files"
#: src\controller\main_controller.py:38
msgid "Video files"
msgstr "Video files"
#: src\controller\main_controller.py:93
msgid "Error: No audio file selected, please select one before generating text."
msgstr "Error: No audio file selected, please select one before generating text."
#: src\controller\main_controller.py:116
msgid "Error: The selected audio language is not valid."
msgstr "Error: The selected audio language is not valid."
#: src\controller\main_controller.py:120
#: src\controller\main_controller.py:224
msgid "Error generating the file transcription. Please try again."
msgstr "Error generating the file transcription. Please try again."
#: src\controller\main_controller.py:254
msgid "Error: No microphone detected."
msgstr "Error: No microphone detected."
#: src\controller\main_controller.py:257
msgid "Error: Listening timed out while waiting for phrase to start."
msgstr "Error: Listening timed out while waiting for phrase to start."
#: src\controller\main_controller.py:262
msgid "Sorry, I cannot clarify what you are saying. Please try again."
msgstr "Sorry, I cannot clarify what you are saying. Please try again."
#: src\controller\main_controller.py:267
msgid "Unexpected error. Please try again."
msgstr "Unexpected error. Please try again."
#: src\controller\main_controller.py:280
msgid "Save as"
msgstr "Save as"
#: src\controller\main_controller.py:282
msgid "Text file"
msgstr "Text file"
#: src\controller\main_controller.py:282
msgid "All Files"
msgstr "All Files"
#: src\view\main_window.py:62
#: src\view\main_window.py:204
msgid "Select file"
msgstr "Select file"
#: src\view\main_window.py:69
#: src\view\main_window.py:205
msgid "Transcribe from microphone"
msgstr "Transcribe from microphone"
#: src\view\main_window.py:78
msgid "Generate transcription"
msgstr "Generate transcription"
#: src\view\main_window.py:86
msgid "App language"
msgstr "App language"
#: src\view\main_window.py:102
msgid "Appearance mode"
msgstr "Appearance mode"
#: src\view\main_window.py:108
#: src\view\main_window.py:209
#: src\view\main_window.py:210
#: src\view\main_window.py:219
msgid "System"
msgstr "System"
#: src\view\main_window.py:108
#: src\view\main_window.py:209
#: src\view\main_window.py:218
msgid "Light"
msgstr "Light"
#: src\view\main_window.py:108
#: src\view\main_window.py:209
#: src\view\main_window.py:217
msgid "Dark"
msgstr "Dark"
#: src\view\main_window.py:133
#: src\view\main_window.py:211
msgid "Save transcription"
msgstr "Save transcription"

View File

@ -0,0 +1,69 @@
# Copyright (C) 2023 HenestrosaConH
# This file is distributed under the same license as the PACKAGE package.
# José Carlos López Henestrosa <henestrosaconh@gmail.com>, 2023.
#
msgid ""
msgstr ""
"Project-Id-Version: 1.3.0\n"
"Report-Msgid-Bugs-To: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"POT-Creation-Date: 2023-02-08 18:22+0100\n"
"PO-Revision-Date: 2023-02-08 18:22+0100\n"
"Last-Translator: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"Language-Team: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:35
msgid "Select a file"
msgstr "Select a file"
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:37
msgid "Audio files"
msgstr "Audio files"
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:38
msgid "Video files"
msgstr "Video files"
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:93
msgid "Error: No audio file selected, please select one before generating text."
msgstr "Error: No audio file selected, please select one before generating text."
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:116
msgid "Error: The selected audio language is not valid."
msgstr "Error: The selected audio language is not valid."
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:120
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:224
msgid "Error generating the file transcription. Please try again."
msgstr "Error generating the file transcription. Please try again."
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:254
msgid "Error: No microphone detected."
msgstr "Error: No microphone detected."
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:257
msgid "Error: Listening timed out while waiting for phrase to start."
msgstr "Error: Listening timed out while waiting for phrase to start."
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:262
msgid "Sorry, I cannot clarify what you are saying. Please try again."
msgstr "Sorry, I cannot clarify what you are saying. Please try again."
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:267
msgid "Unexpected error. Please try again."
msgstr "Unexpected error. Please try again."
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:280
msgid "Save as"
msgstr "Save as"
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:282
msgid "Text file"
msgstr "Text file"
#: D:\Mio\Proyectos\Python\audiotext\src\controller\main_controller.py:282
msgid "All Files"
msgstr "All Files"

View File

@ -0,0 +1,62 @@
# Copyright (C) 2023 HenestrosaConH
# This file is distributed under the same license as the PACKAGE package.
# José Carlos López Henestrosa <henestrosaconh@gmail.com>, 2023.
#
msgid ""
msgstr ""
"Project-Id-Version: 1.3.0\n"
"Report-Msgid-Bugs-To: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"POT-Creation-Date: 2023-02-08 18:22+0100\n"
"PO-Revision-Date: 2023-02-08 18:22+0100\n"
"Last-Translator: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"Language-Team: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: src\view\main_window.py:62
#: src\view\main_window.py:204
msgid "Select file"
msgstr "Select file"
#: src\view\main_window.py:69
#: src\view\main_window.py:205
msgid "Transcribe from microphone"
msgstr "Transcribe from microphone"
#: src\view\main_window.py:78
msgid "Generate transcription"
msgstr "Generate transcription"
#: src\view\main_window.py:86
msgid "App language"
msgstr "App language"
#: src\view\main_window.py:102
msgid "Appearance mode"
msgstr "Appearance mode"
#: src\view\main_window.py:108
#: src\view\main_window.py:209
#: src\view\main_window.py:210
#: src\view\main_window.py:219
msgid "System"
msgstr "System"
#: src\view\main_window.py:108
#: src\view\main_window.py:209
#: src\view\main_window.py:218
msgid "Light"
msgstr "Light"
#: src\view\main_window.py:108
#: src\view\main_window.py:209
#: src\view\main_window.py:217
msgid "Dark"
msgstr "Dark"
#: src\view\main_window.py:133
#: src\view\main_window.py:211
msgid "Save transcription"
msgstr "Save transcription"

Binary file not shown.

View File

@ -0,0 +1,116 @@
# Copyright (C) 2023 HenestrosaConH
# This file is distributed under the same license as the PACKAGE package.
# José Carlos López Henestrosa <henestrosaconh@gmail.com>, 2023.
#
msgid ""
msgstr ""
"Project-Id-Version: 1.3.0\n"
"Report-Msgid-Bugs-To: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"POT-Creation-Date: 2023-02-08 18:22+0100\n"
"PO-Revision-Date: 2023-02-08 18:22+0100\n"
"Last-Translator: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"Language-Team: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: src\view\main_window.py:62
#: src\view\main_window.py:204
msgid "Select file"
msgstr "Selecciona archivo"
#: src\view\main_window.py:69
#: src\view\main_window.py:205
msgid "Transcribe from microphone"
msgstr "Transcribir del micrófono"
#: src\view\main_window.py:78
msgid "Generate transcription"
msgstr "Generar transcripción"
#: src\view\main_window.py:86
msgid "App language"
msgstr "Lenguaje del programa"
#: src\view\main_window.py:102
msgid "Appearance mode"
msgstr "Modo de apariencia"
#: src\view\main_window.py:108
#: src\view\main_window.py:209
#: src\view\main_window.py:210
#: src\view\main_window.py:219
msgid "System"
msgstr "Sistema"
#: src\view\main_window.py:108
#: src\view\main_window.py:209
#: src\view\main_window.py:218
msgid "Light"
msgstr "Claro"
#: src\view\main_window.py:108
#: src\view\main_window.py:209
#: src\view\main_window.py:217
msgid "Dark"
msgstr "Oscuro"
#: src\view\main_window.py:133
#: src\view\main_window.py:211
msgid "Save transcription"
msgstr "Guardar transcripción"
#: src\controller\main_controller.py:35
msgid "Select a file"
msgstr "Seleccionar archivo"
#: src\controller\main_controller.py:37
msgid "Audio files"
msgstr "Archivos de audio"
#: src\controller\main_controller.py:38
msgid "Video files"
msgstr "Archivos de vídeo"
#: src\controller\main_controller.py:93
msgid "Error: No audio file selected, please select one before generating text."
msgstr "Error: No se ha seleccionado un archivo de audio. Por favor, seleccione uno antes de generar el texto."
#: src\controller\main_controller.py:116
msgid "Error: The selected audio language is not valid."
msgstr "Error: El idioma del audio seleccionado no es válido."
#: src\controller\main_controller.py:120
#: src\controller\main_controller.py:224
msgid "Error generating the file transcription. Please try again."
msgstr "Error generando la transcripción. Por favor, inténtelo de nuevo."
#: src\controller\main_controller.py:254
msgid "Error: No microphone detected."
msgstr "Error: Micrófono no disponible."
#: src\controller\main_controller.py:257
msgid "Error: Listening timed out while waiting for phrase to start."
msgstr "Error: No se ha detectado audio disponible para transcribir."
#: src\controller\main_controller.py:262
msgid "Sorry, I cannot clarify what you are saying. Please try again."
msgstr "Lo siento, no puedo entender lo que está diciendo. Por favor, inténtelo de nuevo."
#: src\controller\main_controller.py:267
msgid "Unexpected error. Please try again."
msgstr "Error inesperado. Por favor, inténtelo de nuevo."
#: src\controller\main_controller.py:280
msgid "Save as"
msgstr "Guardar como"
#: src\controller\main_controller.py:282
msgid "Text file"
msgstr "Archivo de texto"
#: src\controller\main_controller.py:282
msgid "All Files"
msgstr "Todos los archivos"

View File

@ -0,0 +1,69 @@
# Copyright (C) 2023 HenestrosaConH
# This file is distributed under the same license as the PACKAGE package.
# José Carlos López Henestrosa <henestrosaconh@gmail.com>, 2023.
#
msgid ""
msgstr ""
"Project-Id-Version: 1.3.0\n"
"Report-Msgid-Bugs-To: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"POT-Creation-Date: 2023-02-08 18:22+0100\n"
"PO-Revision-Date: 2023-02-08 18:22+0100\n"
"Last-Translator: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"Language-Team: José Carlos López Henestrosa <henestrosaconh@gmail.com>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"
#: src\controller\main_controller.py:35
msgid "Select a file"
msgstr "Seleccionar archivo"
#: src\controller\main_controller.py:37
msgid "Audio files"
msgstr "Archivos de audio"
#: src\controller\main_controller.py:38
msgid "Video files"
msgstr "Archivos de vídeo"
#: src\controller\main_controller.py:93
msgid "Error: No audio file selected, please select one before generating text."
msgstr "Error: No se ha seleccionado un archivo de audio. Por favor, seleccione uno antes de generar el texto."
#: src\controller\main_controller.py:116
msgid "Error: The selected audio language is not valid."
msgstr "Error: El idioma del audio seleccionado no es válido."
#: src\controller\main_controller.py:120
#: src\controller\main_controller.py:224
msgid "Error generating the file transcription. Please try again."
msgstr "Error generando la transcripción. Por favor, inténtelo de nuevo."
#: src\controller\main_controller.py:254
msgid "Error: No microphone detected."
msgstr "Error: Micrófono no disponible."
#: src\controller\main_controller.py:257
msgid "Error: Listening timed out while waiting for phrase to start."
msgstr "Error: No se ha detectado audio disponible para transcribir."
#: src\controller\main_controller.py:262
msgid "Sorry, I cannot clarify what you are saying. Please try again."
msgstr "Lo siento, no puedo entender lo que está diciendo. Por favor, inténtelo de nuevo."
#: src\controller\main_controller.py:267
msgid "Unexpected error. Please try again."
msgstr "Error inesperado. Por favor, inténtelo de nuevo."
#: src\controller\main_controller.py:280
msgid "Save as"
msgstr "Guardar como"
#: src\controller\main_controller.py:282
msgid "Text file"
msgstr "Archivo de texto"
#: src\controller\main_controller.py:282
msgid "All Files"
msgstr "Todos los archivos"

Binary file not shown.

71
src/app.py Normal file
View File

@ -0,0 +1,71 @@
import customtkinter as ctk
import torch
import utils.config_manager as cm
import utils.constants as c
import utils.path_helper as ph
from controller.main_controller import MainController
from model.config.config_whisperx import ConfigWhisperX
from model.transcription import Transcription
from utils.enums import ComputeType
from view.main_window import MainWindow
class App(ctk.CTk):
def __init__(self):
super().__init__()
# Modes: "System" (standard), "Dark", "Light"
ctk.set_appearance_mode("System")
# Themes: "blue" (standard), "green", "dark-blue"
ctk.set_default_color_theme("blue")
self.title(c.APP_NAME)
self.wm_iconbitmap(ph.ROOT_PATH / ph.IMG_RELATIVE_PATH / "icon.ico")
# Initial size of the window
width = 1000
height = 760
self.geometry(f"{width}x{height}")
# Min size of the window
min_width = 500
min_height = 250
self.minsize(min_width, min_height)
# Check GPU
cm.ConfigManager.modify_value(
section=ConfigWhisperX.Key.SECTION,
key=ConfigWhisperX.Key.CAN_USE_GPU,
new_value=str(torch.cuda.is_available()),
)
if not torch.cuda.is_available():
cm.ConfigManager.modify_value(
section=ConfigWhisperX.Key.SECTION,
key=ConfigWhisperX.Key.COMPUTE_TYPE,
new_value=ComputeType.INT8.value,
)
# Initialize configs
config_whisperx = cm.ConfigManager.get_config_whisperx()
config_google_api = cm.ConfigManager.get_config_google_api()
config_subtitles = cm.ConfigManager.get_config_subtitles()
# Create the view and place it on the root window
view = MainWindow(self, config_whisperx, config_google_api, config_subtitles)
view.pack(fill="both", expand=True)
# Create the model for the controller
transcription = Transcription()
# Create the controller
controller = MainController(transcription, view)
# Set the controller to view
view.set_controller(controller)
if __name__ == "__main__":
app = App()
app.eval("tk::PlaceWindow . center")
app.mainloop()

View File

Binary file not shown.

View File

@ -0,0 +1,354 @@
import asyncio
import os
import shutil
import threading
import traceback
from pathlib import Path
from tkinter import filedialog
import speech_recognition as sr
import utils.audio_utils as au
import utils.config_manager as cm
import whisperx
from model.transcription import Transcription
from moviepy.video.io.VideoFileClip import VideoFileClip
from pydub import AudioSegment
from pydub.silence import split_on_silence
from pytube import YouTube
from pytube.exceptions import RegexMatchError
from utils import constants as c
from utils.enums import AudioSource, TranscriptionMethod
from utils.i18n import _
from utils.path_helper import ROOT_PATH
class MainController:
def __init__(self, transcription: Transcription, view):
self.view = view
self.transcription = transcription
self._is_mic_recording = False
self._whisperx_result = None
# PUBLIC METHODS
def select_file(self):
"""
Prompts a file explorer to determine the audio/video file path to transcribe.
Stores the filepath in the class variable filepath_to_transcribe.
"""
filepath = filedialog.askopenfilename(
initialdir="/",
title=_("Select a file"),
filetypes=[
(
_("All supported files"),
c.AUDIO_FILE_EXTENSIONS + c.VIDEO_FILE_EXTENSIONS,
),
(_("Audio files"), c.AUDIO_FILE_EXTENSIONS),
(_("Video files"), c.VIDEO_FILE_EXTENSIONS),
],
)
if filepath:
self.view.on_select_file_success(filepath)
def prepare_for_transcription(self, transcription: Transcription):
"""
Prepares the transcription process based on provided parameters.
:raises: IndexError if the selected language code is not valid.
"""
self.transcription = transcription
try:
self.view.on_processing_transcription()
if transcription.source == AudioSource.FILE:
self._prepare_for_file_transcription(transcription.source_file_path)
elif transcription.source == AudioSource.MIC:
self._prepare_for_mic_transcription()
elif transcription.source == AudioSource.YOUTUBE:
self._prepare_for_yt_transcription()
except Exception as e:
self._handle_exception(e)
async def handle_transcription_process(self):
try:
# Get transcription
if self.transcription.method == TranscriptionMethod.WHISPERX.value:
await self._transcribe_using_whisperx()
elif self.transcription.method == TranscriptionMethod.GOOGLE_API.value:
await self._transcribe_using_google_api()
if self.transcription.source in (AudioSource.MIC, AudioSource.YOUTUBE):
self.transcription.source_file_path.unlink() # Remove tmp file
except Exception as e:
self._handle_exception(e)
finally:
is_transcription_empty = not self.transcription.text
self.view.on_processed_transcription(success=is_transcription_empty)
def stop_recording_from_mic(self):
self._is_mic_recording = False
def save_transcription(self):
"""
Prompts a file explorer to determine the file to save the
generated transcription.
"""
file_path = Path(self.transcription.source_file_path)
file = filedialog.asksaveasfile(
mode="w",
initialdir=file_path.parent,
initialfile=f"{file_path.stem}.txt",
title=_("Save as"),
defaultextension=".txt",
filetypes=[(_("Text file"), "*.txt"), (_("All Files"), "*.*")],
)
if file:
file.write(self.transcription.text)
file.close()
if self.transcription.should_subtitle:
self._generate_subtitles(Path(file.name))
# PRIVATE METHODS
def _prepare_for_file_transcription(self, source_file_path: str):
if self._is_file_valid(source_file_path):
self.transcription.source_file_path = Path(source_file_path)
threading.Thread(
target=lambda loop: loop.run_until_complete(
self.handle_transcription_process()
),
args=(asyncio.new_event_loop(),),
).start()
else:
raise ValueError("Error: No valid file selected.")
def _prepare_for_mic_transcription(self):
threading.Thread(target=self._record_from_mic).start()
def _prepare_for_yt_transcription(self):
threading.Thread(target=self._download_audio_from_yt_video).start()
def _handle_exception(self, e: Exception):
print(traceback.format_exc())
self.view.on_processed_transcription(success=False)
self.view.display_text(repr(e))
@staticmethod
def _is_file_valid(source_file_path: str):
filepath = Path(source_file_path)
is_audio = filepath.suffix in c.AUDIO_FILE_EXTENSIONS
is_video = filepath.suffix in c.VIDEO_FILE_EXTENSIONS
return filepath.is_file() and (is_audio or is_video)
async def _transcribe_using_whisperx(self):
config_whisperx = cm.ConfigManager.get_config_whisperx()
device = "cpu" if config_whisperx.use_cpu else "cuda"
task = "translate" if self.transcription.should_translate else "transcribe"
try:
model = whisperx.load_model(
config_whisperx.model_size,
device,
compute_type=config_whisperx.compute_type,
task=task,
language=self.transcription.language_code,
)
audio_path = str(self.transcription.source_file_path)
audio = whisperx.load_audio(audio_path)
self._whisperx_result = model.transcribe(
audio, batch_size=config_whisperx.batch_size
)
text_combined = " ".join(
segment["text"].strip() for segment in self._whisperx_result["segments"]
)
# Align output if should subtitle
if self.transcription.should_subtitle:
model_aligned, metadata = whisperx.load_align_model(
language_code=self.transcription.language_code, device=device
)
self._whisperx_result = whisperx.align(
self._whisperx_result["segments"],
model_aligned,
metadata,
audio,
device,
return_char_alignments=False,
)
self.transcription.text = text_combined
self.view.display_text(self.transcription.text)
except Exception as e:
self._handle_exception(e)
async def _transcribe_using_google_api(self):
"""
Splits a large audio file into chunks
and applies speech recognition on each one.
"""
file_path = self.transcription.source_file_path
# Can be the transcription or an error text
transcription_text = ""
# Create a directory to store the audio chunks
chunks_directory = ROOT_PATH / "audio-chunks"
chunks_directory.mkdir(exist_ok=True)
try:
# Get file extension
content_type = Path(file_path).suffix
sound = None
# Open the audio file using pydub
if content_type in c.AUDIO_FILE_EXTENSIONS:
sound = AudioSegment.from_file(file_path)
elif content_type in c.VIDEO_FILE_EXTENSIONS:
clip = VideoFileClip(str(file_path))
video_audio_path = chunks_directory / f"{Path(file_path).stem}.wav"
clip.audio.write_audiofile(video_audio_path)
sound = AudioSegment.from_wav(video_audio_path)
audio_chunks = split_on_silence(
sound,
# Minimum duration of silence required to consider a segment as a split point
min_silence_len=500,
# Audio with a level -X decibels below the original audio level will be considered as silence
silence_thresh=sound.dBFS - 40,
# Adds a buffer of silence before and after each split point
keep_silence=100,
)
# Create a speech recognition object
r = sr.Recognizer()
# Get Google API key (if any)
config_google_api = cm.ConfigManager.get_config_google_api()
api_key = config_google_api.api_key or None
# Process each chunk
for idx, audio_chunk in enumerate(audio_chunks):
# Export audio chunk and save it in the `chunks_directory` directory.
chunk_filename = os.path.join(chunks_directory, f"chunk{idx}.wav")
audio_chunk.export(chunk_filename, bitrate="192k", format="wav")
# Recognize the chunk
with sr.AudioFile(chunk_filename) as source:
r.adjust_for_ambient_noise(source)
audio_listened = r.record(source)
try:
# Try converting it to text
chunk_text = r.recognize_google(
audio_listened,
language=self.transcription.language_code,
key=api_key,
)
chunk_text = f"{chunk_text.capitalize()}. "
transcription_text += chunk_text
print(f"chunk text: {chunk_text}")
except Exception:
continue
self.transcription.text = transcription_text
except Exception:
self.view.display_text(traceback.format_exc())
finally:
# Delete temporal directory and files
shutil.rmtree(chunks_directory)
if self.transcription.text:
self.view.display_text(self.transcription.text)
def _record_from_mic(self):
self._is_mic_recording = True
audio_data = []
try:
r = sr.Recognizer()
with sr.Microphone() as mic:
while self._is_mic_recording:
audio_chunk = r.listen(mic, timeout=5)
audio_data.append(audio_chunk)
if audio_data:
filename = "mic-output.wav"
au.save_audio_data(audio_data, filename=filename)
self.transcription.source_file_path = Path(filename)
threading.Thread(
target=lambda loop: loop.run_until_complete(
self.handle_transcription_process()
),
args=(asyncio.new_event_loop(),),
).start()
else:
e = ValueError("No audio detected")
self._handle_exception(e)
except Exception as e:
self.view.stop_recording_from_mic()
self._handle_exception(e)
def _generate_subtitles(self, file_path):
config_subtitles = cm.ConfigManager.get_config_subtitles()
output_formats = ["srt", "vtt"]
output_dir = file_path.parent
for output_format in output_formats:
writer = whisperx.transcribe.get_writer(output_format, output_dir)
writer_args = {
"highlight_words": config_subtitles.highlight_words,
"max_line_count": config_subtitles.max_line_count,
"max_line_width": config_subtitles.max_line_width,
}
# https://github.com/m-bain/whisperX/issues/455#issuecomment-1707547704
self._whisperx_result["language"] = "en"
writer(self._whisperx_result, file_path, writer_args)
def _download_audio_from_yt_video(self):
try:
yt = YouTube(self.transcription.youtube_url)
stream = yt.streams.filter(only_audio=True).first()
output_file = stream.download(output_path=".", filename="yt-audio.mp3")
if output_file:
self.transcription.source_file_path = Path(output_file)
threading.Thread(
target=lambda loop: loop.run_until_complete(
self.handle_transcription_process()
),
args=(asyncio.new_event_loop(),),
).start()
except RegexMatchError:
e = ValueError("The URL is not correct.")
self._handle_exception(e)
except Exception as e:
self._handle_exception(e)

0
src/model/__init__.py Normal file
View File

Binary file not shown.

View File

Binary file not shown.

View File

@ -0,0 +1,26 @@
from dataclasses import dataclass
from enum import Enum
from typing import Optional
@dataclass
class ConfigGoogleApi:
api_key: str
class Key(Enum):
"""
Enum class for keys associated with the Google API configuration.
"""
SECTION = "google_api"
API_KEY = "api_key"
def value_type(self) -> Optional[str]:
"""
Get the value type associated with the ConfigKey.
"""
type_mapping = {
self.API_KEY: "str",
}
return type_mapping.get(self, None)

View File

@ -0,0 +1,30 @@
from dataclasses import dataclass
from enum import Enum
from typing import Optional
@dataclass
class ConfigSubtitles:
highlight_words: bool
max_line_count: int
max_line_width: int
class Key(Enum):
"""
Enum class for keys associated with the subtitles configuration.
"""
SECTION = "subtitles"
HIGHLIGHT_WORDS = "highlight_words"
MAX_LINE_COUNT = "max_line_count"
MAX_LINE_WIDTH = "max_line_width"
def value_type(self) -> Optional[str]:
"""Get the value type associated with the ConfigKey."""
type_mapping = {
self.HIGHLIGHT_WORDS: "bool",
self.MAX_LINE_COUNT: "int",
self.MAX_LINE_WIDTH: "int",
}
return type_mapping.get(self, None)

View File

@ -0,0 +1,41 @@
from dataclasses import dataclass
from enum import Enum
from typing import Optional
@dataclass
class ConfigWhisperX:
model_size: str
batch_size: int
compute_type: str
use_cpu: bool
can_use_gpu: bool
class Key(Enum):
"""
Enum class for keys associated with the WhisperX configuration.
"""
SECTION = "whisperx"
MODEL_SIZE = "model_size"
BATCH_SIZE = "batch_size"
COMPUTE_TYPE = "compute_type"
USE_CPU = "use_cpu"
CAN_USE_GPU = "can_use_gpu"
def value_type(self) -> Optional[str]:
"""
Get the value type associated with the ConfigKey.
:return
:rtype: str
"""
type_mapping = {
self.MODEL_SIZE: "str",
self.BATCH_SIZE: "int",
self.COMPUTE_TYPE: "str",
self.USE_CPU: "bool",
self.CAN_USE_GPU: "bool",
}
return type_mapping.get(self, None)

View File

@ -0,0 +1,17 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
from utils.enums import AudioSource
@dataclass
class Transcription:
text: Optional[str] = None
language_code: Optional[str] = None
source: Optional[AudioSource] = None
source_file_path: Optional[Path | str] = Path("/")
method: Optional[int] = None
should_translate: bool = False
should_subtitle: bool = False
youtube_url: str = None

0
src/utils/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

26
src/utils/audio_utils.py Normal file
View File

@ -0,0 +1,26 @@
import speech_recognition as sr
from pydub import AudioSegment
def save_audio_data(audio_data, filename):
if audio_data:
raw_audio_data = b"".join(
[
chunk.get_raw_data(convert_rate=None, convert_width=None)
for chunk in audio_data
]
)
audio = AudioSegment(
raw_audio_data,
sample_width=audio_data[0].sample_width,
frame_rate=audio_data[0].sample_rate,
channels=1,
)
try:
audio.export(filename, format="wav")
print(f"Audio data saved to {filename}")
except sr.UnknownValueError:
print("Could not save audio data. Unknown value error.")
except sr.RequestError as e:
print(f"Could not request results; {e}")

110
src/utils/config_manager.py Normal file
View File

@ -0,0 +1,110 @@
from configparser import ConfigParser
from pathlib import Path
from typing import Optional, Union
from model.config.config_google_api import ConfigGoogleApi
from model.config.config_subtitles import ConfigSubtitles
from model.config.config_whisperx import ConfigWhisperX
from utils.path_helper import ROOT_PATH
class ConfigManager:
_FILE_PATH = ROOT_PATH / "config.ini"
KeyType = Union[ConfigWhisperX.Key, ConfigGoogleApi.Key, ConfigSubtitles.Key]
@staticmethod
def read_config(file_path: Path = _FILE_PATH) -> Optional[ConfigParser]:
config = ConfigParser()
config.read(file_path)
return config
@staticmethod
def get_config_whisperx() -> ConfigWhisperX:
section = ConfigWhisperX.Key.SECTION
return ConfigWhisperX(
model_size=ConfigManager.get_value(section, ConfigWhisperX.Key.MODEL_SIZE),
batch_size=ConfigManager.get_value(section, ConfigWhisperX.Key.BATCH_SIZE),
compute_type=ConfigManager.get_value(
section, ConfigWhisperX.Key.COMPUTE_TYPE
),
use_cpu=ConfigManager.get_value(section, ConfigWhisperX.Key.USE_CPU),
can_use_gpu=ConfigManager.get_value(
section, ConfigWhisperX.Key.CAN_USE_GPU
),
)
@staticmethod
def get_config_google_api() -> ConfigGoogleApi:
section = ConfigGoogleApi.Key.SECTION
return ConfigGoogleApi(
api_key=ConfigManager.get_value(section, ConfigGoogleApi.Key.API_KEY),
)
@staticmethod
def get_config_subtitles() -> ConfigSubtitles:
section = ConfigSubtitles.Key.SECTION
return ConfigSubtitles(
highlight_words=ConfigManager.get_value(
section, ConfigSubtitles.Key.HIGHLIGHT_WORDS
),
max_line_count=ConfigManager.get_value(
section, ConfigSubtitles.Key.MAX_LINE_COUNT
),
max_line_width=ConfigManager.get_value(
section, ConfigSubtitles.Key.MAX_LINE_WIDTH
),
)
@staticmethod
def get_value(
section: KeyType,
key: KeyType,
file_path: Path = _FILE_PATH,
) -> Optional[Union[str, bool, int, float]]:
config = ConfigManager.read_config(file_path)
section_name = section.value
key_name = key.value
key_value_type = key.value_type()
# Check if the section and key exist before getting the value
if section_name in config and key_name in config[section_name]:
if key_value_type == "str":
return config.get(section_name, key_name)
elif key_value_type == "bool":
return config.getboolean(section_name, key_name)
elif key_value_type == "int":
return config.getint(section_name, key_name)
elif key_value_type == "float":
return config.getfloat(section_name, key_name)
else:
print(
f"Section [{section_name}] or Key [{key_name}] not found in the config"
)
return None
@staticmethod
def modify_value(
section: KeyType,
key: KeyType,
new_value: str,
file_path: Path = _FILE_PATH,
):
config = ConfigManager.read_config(file_path)
section_name = section.value
key_name = key.value
# Check if the section and option exist before modifying
if section_name in config and key_name in config[section_name]:
config.set(section_name, key_name, new_value)
with open(file_path, "w") as config_file:
config.write(config_file)
print(f"Value for [{section}][{key_name}] modified to {new_value}")
else:
print(f"Section [{section}] or Key [{key_name}] not found in the config")

104
src/utils/constants.py Normal file
View File

@ -0,0 +1,104 @@
APP_NAME = "Audiotext"
APP_LANGUAGES = {"en": "English", "es": "Español"}
# Code languages convention: ISO 639-1
AUDIO_LANGUAGES = {
"af": "Afrikaans",
"am": "Amharic",
"ar": "Arabic",
"hy": "Armenian",
"az": "Azerbaijan",
"eu": "Basque",
"be": "Belarusian",
"bn": "Bengali",
"bg": "Bulgarian",
"ca": "Catalan",
"zh": "Chinese (China)",
"zh_HK": "Chinese (Hong Kong)",
"zh_TW": "Chinese (Taiwan)",
"hr": "Croatian",
"cs": "Czech",
"da": "Danish",
"nl": "Dutch",
"en": "English",
"et": "Estonian",
"fa": "Farsi",
"fil": "Filipino",
"fi": "Finnish",
"fr": "French",
"gl": "Galician",
"ka": "Georgian",
"de": "German",
"de_CH": "German (Swiss Standard)",
"el": "Greek",
"gu": "Gujarati",
"iw": "Hebrew",
"hi": "Hindi",
"hu": "Hungarian",
"is": "Icelandic",
"id": "Indonesian",
"it": "Italian",
"it_CH": "Swiss Italian",
"ja": "Japanese",
"jv": "Javanese",
"kn": "Kannada",
"kk": "Kazakh",
"km": "Khmer",
"ko": "Korean",
"lo": "Lao",
"lv": "Latvian",
"lt": "Lithuanian",
"ms": "Malay",
"ml": "Malayalam",
"mt": "Maltese",
"mr": "Marathi",
"mn": "Mongolian",
"ne": "Nepali",
"no": "Norwegian",
"nn": "Norwegian Nynorsk",
"pl": "Polish",
"pt": "Português",
"pa": "Punjabi",
"ro": "Romanian",
"ru": "Russian",
"sr": "Serbian",
"si": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"es": "Spanish",
"su": "Sundanese",
"sw": "Swahili",
"sv": "Swedish",
"ta": "Tamil",
"te": "Telugu",
"th": "Thai",
"tr": "Turkish",
"uk": "Ukrainian",
"ur": "Urdu",
"vi": "Vietnamese",
"zu": "Zulu",
}
AUDIO_FILE_EXTENSIONS = [
".mp3",
".mpeg",
".wav",
".wma",
".aac",
".flac",
".ogg",
".oga",
".opus",
]
# fmt: off
VIDEO_FILE_EXTENSIONS = [
".mp4", ".m4a", ".m4v", ".f4v", ".f4a", ".m4b", ".m4r", ".f4b", ".mov", # MP4
".avi", # AVI
".webm", # WebM
".flv", # FLV
".mkv", # MKV
".3gp", ".3gp2", ".3g2", ".3gpp", ".3gpp2", # 3GP
".ogv", ".ogx", # OGG
".wmv", ".asf" # AIFF / ASF
]

5
src/utils/dict_utils.py Normal file
View File

@ -0,0 +1,5 @@
def find_key_by_value(dictionary, target_value):
for key, value in dictionary.items():
if value == target_value:
return key
return None

40
src/utils/enums.py Normal file
View File

@ -0,0 +1,40 @@
from enum import Enum
class AudioSource(Enum):
FILE = "File"
MIC = "Microphone"
YOUTUBE = "YouTube"
class TranscriptionMethod(Enum):
WHISPERX = 0
GOOGLE_API = 1
class Color(Enum):
LIGHT_RED = "#D30000"
DARK_RED = "#8b0000"
HOVER_LIGHT_RED = "#BF0000"
HOVER_DARK_RED = "#610000"
LIGHT_BLUE = "#3B8ED0"
DARK_BLUE = "#1F6AA5"
HOVER_LIGHT_BLUE = "#36719F"
HOVER_DARK_BLUE = "#144870"
class ComputeType(Enum):
INT8 = "int8"
FLOAT16 = "float16"
FLOAT32 = "float32"
class ModelSize(Enum):
TINY = "tiny"
BASE = "base"
SMALL = "small"
MEDIUM = "medium"
LARGE_V1 = "large-v1"
LARGE_V2 = "large-v2"
LARGE_V3 = "large-v3"

44
src/utils/i18n.py Normal file
View File

@ -0,0 +1,44 @@
import gettext
import locale
import utils.constants as c
import utils.path_helper as ph
_ = None
def load_translation(language_code: str):
"""
Loads the translation for the provided language code.
This function uses the gettext library to load the translation file
for the provided language code from the localedir. If the translation
file is not found, the fallback is set to True so that a default language
will be used if available. The function then installs the loaded translation
and sets the global _ variable to the gettext function for later use.
:param language_code: The code for the language to be used for translation.
:type language_code: str
"""
try:
lang_code_without_territory = language_code.split("_")[0]
if lang_code_without_territory not in c.APP_LANGUAGES:
lang_code_without_territory = "en"
except Exception:
lang_code_without_territory = "en"
locale.setlocale(locale.LC_ALL, "en_US")
translation = gettext.translation(
"app",
localedir=ph.ROOT_PATH / "res/locales",
languages=[lang_code_without_territory],
fallback=True,
)
translation.install()
global _
_ = translation.gettext
load_translation("en")
# load_translation(locale.getdefaultlocale()[0][:2])

26
src/utils/path_helper.py Normal file
View File

@ -0,0 +1,26 @@
import sys
from pathlib import Path
def get_path(relative_path: str = "") -> Path:
"""
Gets absolute path of the project.
:param relative_path: The relative path to the application's base path.
Default is an empty string.
:type relative_path: str
:return: The absolute path to the file or directory specified by the relative path.
:rtype: Path
"""
try:
# PyInstaller creates a temp folder and stores path in _MEIPASS
base_path = Path(sys._MEIPASS)
except (Exception,):
base_path = Path(__file__).parent.parent.parent
return base_path / relative_path
IMG_RELATIVE_PATH = "res/img"
ROOT_PATH = get_path("")

0
src/view/__init__.py Normal file
View File

View File

View File

@ -0,0 +1,168 @@
from typing import Optional, Tuple, Union
import customtkinter as ctk
from utils.enums import Color
class CTkInputDialog(ctk.CTkToplevel):
"""
Dialog with extra window, message, entry widget, cancel and ok button.
For detailed information check out the documentation.
"""
def __init__(
self,
fg_color: Optional[Union[str, Tuple[str, str]]] = None,
text_color: Optional[Union[str, Tuple[str, str]]] = None,
button_fg_color: Optional[Union[str, Tuple[str, str]]] = None,
button_hover_color: Optional[Union[str, Tuple[str, str]]] = None,
button_text_color: Optional[Union[str, Tuple[str, str]]] = None,
entry_fg_color: Optional[Union[str, Tuple[str, str]]] = None,
entry_border_color: Optional[Union[str, Tuple[str, str]]] = None,
entry_text_color: Optional[Union[str, Tuple[str, str]]] = None,
title: str = "CTkDialog",
font: Optional[Union[tuple, ctk.CTkFont]] = None,
label_text: str = "CTkDialog",
entry_text: Optional[str] = None,
):
super().__init__(fg_color=fg_color)
self._fg_color = (
ctk.ThemeManager.theme["CTkToplevel"]["fg_color"]
if fg_color is None
else self._check_color_type(fg_color)
)
self._text_color = (
ctk.ThemeManager.theme["CTkLabel"]["text_color"]
if text_color is None
else self._check_color_type(button_hover_color)
)
self._button_fg_color = (
ctk.ThemeManager.theme["CTkButton"]["fg_color"]
if button_fg_color is None
else self._check_color_type(button_fg_color)
)
self._button_hover_color = (
ctk.ThemeManager.theme["CTkButton"]["hover_color"]
if button_hover_color is None
else self._check_color_type(button_hover_color)
)
self._button_text_color = (
ctk.ThemeManager.theme["CTkButton"]["text_color"]
if button_text_color is None
else self._check_color_type(button_text_color)
)
self._entry_fg_color = (
ctk.ThemeManager.theme["CTkEntry"]["fg_color"]
if entry_fg_color is None
else self._check_color_type(entry_fg_color)
)
self._entry_border_color = (
ctk.ThemeManager.theme["CTkEntry"]["border_color"]
if entry_border_color is None
else self._check_color_type(entry_border_color)
)
self._entry_text_color = (
ctk.ThemeManager.theme["CTkEntry"]["text_color"]
if entry_text_color is None
else self._check_color_type(entry_text_color)
)
self._user_input: Union[str, None] = None
self._running: bool = False
self._title = title
self._label_text = label_text
self._entry_text = entry_text
self._font = font
self.title(self._title)
self.lift() # lift window on top
self.attributes("-topmost", True) # stay on top
self.protocol("WM_DELETE_WINDOW", self._on_closing)
self.after(
10, self._create_widgets
) # create widgets with slight delay, to avoid white flickering of background
self.resizable(False, False)
self.grab_set() # make other windows not clickable
def _create_widgets(self):
self.grid_columnconfigure((0, 1), weight=1)
self.rowconfigure(0, weight=1)
self._label = ctk.CTkLabel(
master=self,
width=300,
wraplength=300,
fg_color="transparent",
text_color=self._text_color,
text=self._label_text,
font=self._font,
)
self._label.grid(row=0, column=0, columnspan=2, padx=20, pady=20, sticky="ew")
self._entry = ctk.CTkEntry(
master=self,
width=230,
fg_color=self._entry_fg_color,
border_color=self._entry_border_color,
text_color=self._entry_text_color,
font=self._font,
textvariable=ctk.StringVar(self, self._entry_text),
)
self._entry.grid(
row=1, column=0, columnspan=2, padx=20, pady=(0, 20), sticky="ew"
)
self._ok_button = ctk.CTkButton(
master=self,
width=100,
border_width=0,
fg_color=self._button_fg_color,
hover_color=self._button_hover_color,
text_color=self._button_text_color,
text="Ok",
font=self._font,
command=self._ok_event,
)
self._ok_button.grid(
row=2, column=0, columnspan=1, padx=(20, 10), pady=(0, 20), sticky="ew"
)
self._cancel_button = ctk.CTkButton(
master=self,
width=100,
border_width=0,
fg_color=(Color.LIGHT_RED.value, Color.DARK_RED.value),
hover_color=(
Color.HOVER_LIGHT_RED.value,
Color.HOVER_DARK_RED.value,
),
text_color=self._button_text_color,
text="Cancel",
font=self._font,
command=self._cancel_event,
)
self._cancel_button.grid(
row=2, column=1, columnspan=1, padx=(10, 20), pady=(0, 20), sticky="ew"
)
# set focus to entry with slight delay, otherwise it won't work
self.after(150, lambda: self._entry.focus())
self._entry.bind("<Return>", self._ok_event)
def _ok_event(self):
self._user_input = self._entry.get()
self.grab_release()
self.destroy()
def _on_closing(self):
self.grab_release()
self.destroy()
def _cancel_event(self):
self.grab_release()
self.destroy()
def get_input(self):
self.master.wait_window(self)
return self._user_input

View File

@ -0,0 +1,12 @@
"""
CustomTkinter Scrollable Dropdown Menu
Author: Akash Bora
License: MIT
This is a custom dropdown menu for customtkinter.
Homepage: https://github.com/Akascape/CTkScrollableDropdown
"""
__version__ = "1.0"
from .ctk_scrollable_dropdown import CTkScrollableDropdown
from .ctk_scrollable_dropdown_frame import CTkScrollableDropdownFrame

View File

@ -0,0 +1,453 @@
"""
Advanced Scrollable Dropdown class for customtkinter widgets
Author: Akash Bora
"""
import difflib
import sys
import time
import customtkinter
class CTkScrollableDropdown(customtkinter.CTkToplevel):
def __init__(
self,
attach,
x=None,
y=None,
button_color=None,
height: int = 200,
width: int = None,
fg_color=None,
button_height: int = 20,
justify="center",
scrollbar_button_color=None,
scrollbar=True,
scrollbar_button_hover_color=None,
frame_border_width=2,
values=[],
command=None,
image_values=[],
alpha: float = 0.97,
frame_corner_radius=20,
double_click=False,
resize=True,
frame_border_color=None,
text_color=None,
autocomplete=False,
hover_color=None,
**button_kwargs
):
super().__init__(takefocus=1)
self.focus()
self.lift()
self.alpha = alpha
self.attach = attach
self.corner = frame_corner_radius
self.padding = 0
self.focus_something = False
self.disable = True
self.update()
if sys.platform.startswith("win"):
self.after(100, lambda: self.overrideredirect(True))
self.transparent_color = self._apply_appearance_mode(self._fg_color)
self.attributes("-transparentcolor", self.transparent_color)
elif sys.platform.startswith("darwin"):
self.overrideredirect(True)
self.transparent_color = "systemTransparent"
self.attributes("-transparent", True)
self.focus_something = True
else:
self.overrideredirect(True)
self.transparent_color = "#000001"
self.corner = 0
self.padding = 18
self.withdraw()
self.hide = True
self.attach.bind(
"<Configure>",
lambda e: self._withdraw() if not self.disable else None,
add="+",
)
self.attach.winfo_toplevel().bind(
"<Configure>",
lambda e: self._withdraw() if not self.disable else None,
add="+",
)
self.attach.winfo_toplevel().bind(
"<ButtonPress>",
lambda e: self._withdraw() if not self.disable else None,
add="+",
)
self.attributes("-alpha", 0)
self.disable = False
self.fg_color = (
customtkinter.ThemeManager.theme["CTkFrame"]["fg_color"]
if fg_color is None
else fg_color
)
self.scroll_button_color = (
customtkinter.ThemeManager.theme["CTkScrollbar"]["button_color"]
if scrollbar_button_color is None
else scrollbar_button_color
)
self.scroll_hover_color = (
customtkinter.ThemeManager.theme["CTkScrollbar"]["button_hover_color"]
if scrollbar_button_hover_color is None
else scrollbar_button_hover_color
)
self.frame_border_color = (
customtkinter.ThemeManager.theme["CTkFrame"]["border_color"]
if frame_border_color is None
else frame_border_color
)
self.button_color = (
customtkinter.ThemeManager.theme["CTkFrame"]["top_fg_color"]
if button_color is None
else button_color
)
self.text_color = (
customtkinter.ThemeManager.theme["CTkLabel"]["text_color"]
if text_color is None
else text_color
)
self.hover_color = (
customtkinter.ThemeManager.theme["CTkButton"]["hover_color"]
if hover_color is None
else hover_color
)
if scrollbar is False:
self.scroll_button_color = self.fg_color
self.scroll_hover_color = self.fg_color
self.frame = customtkinter.CTkScrollableFrame(
self,
bg_color=self.transparent_color,
fg_color=self.fg_color,
scrollbar_button_hover_color=self.scroll_hover_color,
corner_radius=self.corner,
border_width=frame_border_width,
scrollbar_button_color=self.scroll_button_color,
border_color=self.frame_border_color,
)
self.frame._scrollbar.grid_configure(padx=3)
self.frame.pack(expand=True, fill="both")
self.dummy_entry = customtkinter.CTkEntry(
self.frame, fg_color="transparent", border_width=0, height=1, width=1
)
self.no_match = customtkinter.CTkLabel(self.frame, text="No Match")
self.height = height
self.height_new = height
self.width = width
self.command = command
self.fade = False
self.resize = resize
self.autocomplete = autocomplete
self.var_update = customtkinter.StringVar()
self.appear = False
if justify.lower() == "left":
self.justify = "w"
elif justify.lower() == "right":
self.justify = "e"
else:
self.justify = "c"
self.button_height = button_height
self.values = values
self.button_num = len(self.values)
self.image_values = (
None if len(image_values) != len(self.values) else image_values
)
self.resizable(width=False, height=False)
self.transient(self.master)
self._init_buttons(**button_kwargs)
# Add binding for different ctk widgets
if (
double_click
or self.attach.winfo_name().startswith("!ctkentry")
or self.attach.winfo_name().startswith("!ctkcombobox")
):
self.attach.bind("<Double-Button-1>", lambda e: self._iconify(), add="+")
else:
self.attach.bind("<Button-1>", lambda e: self._iconify(), add="+")
if self.attach.winfo_name().startswith("!ctkcombobox"):
self.attach._canvas.tag_bind(
"right_parts", "<Button-1>", lambda e: self._iconify()
)
self.attach._canvas.tag_bind(
"dropdown_arrow", "<Button-1>", lambda e: self._iconify()
)
if self.command is None:
self.command = self.attach.set
if self.attach.winfo_name().startswith("!ctkoptionmenu"):
self.attach._canvas.bind("<Button-1>", lambda e: self._iconify())
self.attach._text_label.bind("<Button-1>", lambda e: self._iconify())
if self.command is None:
self.command = self.attach.set
self.attach.bind("<Destroy>", lambda _: self._destroy(), add="+")
self.update_idletasks()
self.x = x
self.y = y
if self.autocomplete:
self.bind_autocomplete()
self.deiconify()
self.withdraw()
self.attributes("-alpha", self.alpha)
def _destroy(self):
self.after(500, self.destroy_popup)
def _withdraw(self):
if self.winfo_viewable() and self.hide:
self.withdraw()
self.event_generate("<<Closed>>")
self.hide = True
def _update(self, a, b, c):
self.live_update(self.attach._entry.get())
def bind_autocomplete(
self,
):
def appear(x):
self.appear = True
if self.attach.winfo_name().startswith("!ctkcombobox"):
self.attach._entry.configure(textvariable=self.var_update)
self.attach._entry.bind("<Key>", appear)
self.attach.set(self.values[0])
self.var_update.trace_add("write", self._update)
if self.attach.winfo_name().startswith("!ctkentry"):
self.attach.configure(textvariable=self.var_update)
self.attach.bind("<Key>", appear)
self.var_update.trace_add("write", self._update)
def fade_out(self):
for i in range(100, 0, -10):
if not self.winfo_exists():
break
self.attributes("-alpha", i / 100)
self.update()
time.sleep(1 / 100)
def fade_in(self):
for i in range(0, 100, 10):
if not self.winfo_exists():
break
self.attributes("-alpha", i / 100)
self.update()
time.sleep(1 / 100)
def _init_buttons(self, **button_kwargs):
self.i = 0
self.widgets = {}
for row in self.values:
self.widgets[self.i] = customtkinter.CTkButton(
self.frame,
text=row,
height=self.button_height,
fg_color=self.button_color,
text_color=self.text_color,
image=self.image_values[self.i]
if self.image_values is not None
else None,
anchor=self.justify,
command=lambda k=row: self._attach_key_press(k),
**button_kwargs
)
self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0))
self.i += 1
self.hide = False
def destroy_popup(self):
self.destroy()
self.disable = True
def place_dropdown(self):
self.x_pos = (
self.attach.winfo_rootx()
if self.x is None
else self.x + self.attach.winfo_rootx()
)
self.y_pos = (
self.attach.winfo_rooty() + self.attach.winfo_reqheight() + 5
if self.y is None
else self.y + self.attach.winfo_rooty()
)
self.width_new = self.attach.winfo_width() if self.width is None else self.width
if self.resize:
if self.button_num <= 5:
self.height_new = self.button_height * self.button_num + 55
else:
self.height_new = self.button_height * self.button_num + 35
if self.height_new > self.height:
self.height_new = self.height
self.geometry(
"{}x{}+{}+{}".format(
self.width_new, self.height_new, self.x_pos, self.y_pos
)
)
self.fade_in()
self.attributes("-alpha", self.alpha)
self.attach.focus()
def _iconify(self):
if self.attach.cget("state") == "disabled":
return
if self.disable:
return
if self.hide:
self.event_generate("<<Opened>>")
self._deiconify()
self.focus()
self.hide = False
self.place_dropdown()
if self.focus_something:
self.dummy_entry.pack()
self.dummy_entry.focus_set()
self.after(100, self.dummy_entry.pack_forget)
else:
self.withdraw()
self.hide = True
def _attach_key_press(self, k):
self.event_generate("<<Selected>>")
self.fade = True
if self.command:
self.command(k)
self.fade = False
self.fade_out()
self.withdraw()
self.hide = True
def live_update(self, string=None):
if not self.appear:
return
if self.disable:
return
if self.fade:
return
if string:
string = string.lower()
self._deiconify()
i = 1
for key in self.widgets.keys():
s = self.widgets[key].cget("text").lower()
text_similarity = difflib.SequenceMatcher(
None, s[0 : len(string)], string
).ratio()
similar = s.startswith(string) or text_similarity > 0.75
if not similar:
self.widgets[key].pack_forget()
else:
self.widgets[key].pack(fill="x", pady=2, padx=(self.padding, 0))
i += 1
if i == 1:
self.no_match.pack(fill="x", pady=2, padx=(self.padding, 0))
else:
self.no_match.pack_forget()
self.button_num = i
self.place_dropdown()
else:
self.no_match.pack_forget()
self.button_num = len(self.values)
for key in self.widgets.keys():
self.widgets[key].destroy()
self._init_buttons()
self.place_dropdown()
self.frame._parent_canvas.yview_moveto(0.0)
self.appear = False
def insert(self, value, **kwargs):
self.widgets[self.i] = customtkinter.CTkButton(
self.frame,
text=value,
height=self.button_height,
fg_color=self.button_color,
text_color=self.text_color,
anchor=self.justify,
command=lambda k=value: self._attach_key_press(k),
**kwargs
)
self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0))
self.i += 1
self.values.append(value)
def _deiconify(self):
if len(self.values) > 0:
self.deiconify()
def popup(self, x=None, y=None):
self.x = x
self.y = y
self.hide = True
self._iconify()
def configure(self, **kwargs):
if "height" in kwargs:
self.height = kwargs.pop("height")
self.height_new = self.height
if "alpha" in kwargs:
self.alpha = kwargs.pop("alpha")
if "width" in kwargs:
self.width = kwargs.pop("width")
if "fg_color" in kwargs:
self.frame.configure(fg_color=kwargs.pop("fg_color"))
if "values" in kwargs:
self.values = kwargs.pop("values")
self.image_values = None
self.button_num = len(self.values)
for key in self.widgets.keys():
self.widgets[key].destroy()
self._init_buttons()
if "image_values" in kwargs:
self.image_values = kwargs.pop("image_values")
self.image_values = (
None
if len(self.image_values) != len(self.values)
else self.image_values
)
if self.image_values is not None:
i = 0
for key in self.widgets.keys():
self.widgets[key].configure(image=self.image_values[i])
i += 1
if "button_color" in kwargs:
for key in self.widgets.keys():
self.widgets[key].configure(fg_color=kwargs.pop("button_color"))
if "hover_color" not in kwargs:
kwargs["hover_color"] = self.hover_color
for key in self.widgets.keys():
self.widgets[key].configure(**kwargs)

View File

@ -0,0 +1,399 @@
"""
Advanced Scrollable Dropdown Frame class for customtkinter widgets
Author: Akash Bora
"""
import difflib
import sys
import customtkinter
class CTkScrollableDropdownFrame(customtkinter.CTkFrame):
def __init__(
self,
attach,
x=None,
y=None,
button_color=None,
height: int = 200,
width: int = None,
fg_color=None,
button_height: int = 20,
justify="center",
scrollbar_button_color=None,
scrollbar=True,
scrollbar_button_hover_color=None,
frame_border_width=2,
values=[],
command=None,
image_values=[],
double_click=False,
frame_corner_radius=True,
resize=True,
frame_border_color=None,
text_color=None,
autocomplete=False,
**button_kwargs
):
super().__init__(
master=attach.winfo_toplevel(), bg_color=attach.cget("bg_color")
)
self.attach = attach
self.corner = 11 if frame_corner_radius else 0
self.padding = 0
self.disable = True
self.hide = True
self.attach.bind(
"<Configure>",
lambda e: self._withdraw() if not self.disable else None,
add="+",
)
self.attach.winfo_toplevel().bind(
"<ButtonPress>",
lambda e: self._withdraw() if not self.disable else None,
add="+",
)
self.disable = False
self.fg_color = (
customtkinter.ThemeManager.theme["CTkFrame"]["fg_color"]
if fg_color is None
else fg_color
)
self.scroll_button_color = (
customtkinter.ThemeManager.theme["CTkScrollbar"]["button_color"]
if scrollbar_button_color is None
else scrollbar_button_color
)
self.scroll_hover_color = (
customtkinter.ThemeManager.theme["CTkScrollbar"]["button_hover_color"]
if scrollbar_button_hover_color is None
else scrollbar_button_hover_color
)
self.frame_border_color = (
customtkinter.ThemeManager.theme["CTkFrame"]["border_color"]
if frame_border_color is None
else frame_border_color
)
self.button_color = (
customtkinter.ThemeManager.theme["CTkFrame"]["top_fg_color"]
if button_color is None
else button_color
)
self.text_color = (
customtkinter.ThemeManager.theme["CTkLabel"]["text_color"]
if text_color is None
else text_color
)
if scrollbar is False:
self.scroll_button_color = self.fg_color
self.scroll_hover_color = self.fg_color
self.frame = customtkinter.CTkScrollableFrame(
self,
fg_color=self.fg_color,
bg_color=attach.cget("bg_color"),
scrollbar_button_hover_color=self.scroll_hover_color,
corner_radius=self.corner,
border_width=frame_border_width,
scrollbar_button_color=self.scroll_button_color,
border_color=self.frame_border_color,
)
self.frame._scrollbar.grid_configure(padx=3)
self.frame.pack(expand=True, fill="both")
if self.corner == 0:
self.corner = 21
self.dummy_entry = customtkinter.CTkEntry(
self.frame, fg_color="transparent", border_width=0, height=1, width=1
)
self.no_match = customtkinter.CTkLabel(self.frame, text="No Match")
self.height = height
self.height_new = height
self.width = width
self.command = command
self.fade = False
self.resize = resize
self.autocomplete = autocomplete
self.var_update = customtkinter.StringVar()
self.appear = False
if justify.lower() == "left":
self.justify = "w"
elif justify.lower() == "right":
self.justify = "e"
else:
self.justify = "c"
self.button_height = button_height
self.values = values
self.button_num = len(self.values)
self.image_values = (
None if len(image_values) != len(self.values) else image_values
)
self._init_buttons(**button_kwargs)
# Add binding for different ctk widgets
if (
double_click
or self.attach.winfo_name().startswith("!ctkentry")
or self.attach.winfo_name().startswith("!ctkcombobox")
):
self.attach.bind("<Double-Button-1>", lambda e: self._iconify(), add="+")
self.attach._entry.bind(
"<FocusOut>",
lambda e: self._withdraw() if not self.disable else None,
add="+",
)
else:
self.attach.bind("<Button-1>", lambda e: self._iconify(), add="+")
if self.attach.winfo_name().startswith("!ctkcombobox"):
self.attach._canvas.tag_bind(
"right_parts", "<Button-1>", lambda e: self._iconify()
)
self.attach._canvas.tag_bind(
"dropdown_arrow", "<Button-1>", lambda e: self._iconify()
)
if self.command is None:
self.command = self.attach.set
if self.attach.winfo_name().startswith("!ctkoptionmenu"):
self.attach._canvas.bind("<Button-1>", lambda e: self._iconify())
self.attach._text_label.bind("<Button-1>", lambda e: self._iconify())
if self.command is None:
self.command = self.attach.set
self.x = x
self.y = y
self.attach.bind("<Destroy>", lambda _: self._destroy(), add="+")
if self.autocomplete:
self.bind_autocomplete()
def _destroy(self):
self.after(500, self.destroy_popup)
def _withdraw(self):
if self.winfo_viewable() and self.hide:
self.place_forget()
self.event_generate("<<Closed>>")
self.hide = True
def _update(self, a, b, c):
self.live_update(self.attach._entry.get())
def bind_autocomplete(self):
def appear(x):
self.appear = True
if self.attach.winfo_name().startswith("!ctkcombobox"):
self.attach._entry.configure(textvariable=self.var_update)
self.attach.set(self.values[0])
self.attach._entry.bind("<Key>", appear)
self.var_update.trace_add("write", self._update)
if self.attach.winfo_name().startswith("!ctkentry"):
self.attach.configure(textvariable=self.var_update)
self.attach.bind("<Key>", appear)
self.var_update.trace_add("write", self._update)
def _init_buttons(self, **button_kwargs):
self.i = 0
self.widgets = {}
for row in self.values:
self.widgets[self.i] = customtkinter.CTkButton(
self.frame,
text=row,
height=self.button_height,
fg_color=self.button_color,
text_color=self.text_color,
image=self.image_values[self.i]
if self.image_values is not None
else None,
anchor=self.justify,
command=lambda k=row: self._attach_key_press(k),
**button_kwargs
)
self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0))
self.i += 1
self.hide = False
def destroy_popup(self):
self.destroy()
self.disable = True
def place_dropdown(self):
self.x_pos = (
self.attach.winfo_x()
if self.x is None
else self.x + self.attach.winfo_rootx()
)
self.y_pos = (
self.attach.winfo_y() + self.attach.winfo_reqheight() + 5
if self.y is None
else self.y + self.attach.winfo_rooty()
)
self.width_new = (
self.attach.winfo_width() - 45 + self.corner
if self.width is None
else self.width
)
if self.resize:
if self.button_num <= 5:
self.height_new = self.button_height * self.button_num + 55
else:
self.height_new = self.button_height * self.button_num + 35
if self.height_new > self.height:
self.height_new = self.height
self.frame.configure(width=self.width_new, height=self.height_new)
self.place(x=self.x_pos, y=self.y_pos)
if sys.platform.startswith("darwin"):
self.dummy_entry.pack()
self.after(100, self.dummy_entry.pack_forget())
self.lift()
self.attach.focus()
def _iconify(self):
if self.attach.cget("state") == "disabled":
return
if self.disable:
return
if self.hide:
self.event_generate("<<Opened>>")
self.hide = False
self.place_dropdown()
else:
self.place_forget()
self.hide = True
def _attach_key_press(self, k):
self.event_generate("<<Selected>>")
self.fade = True
if self.command:
self.command(k)
self.fade = False
self.place_forget()
self.hide = True
def live_update(self, string=None):
if not self.appear:
return
if self.disable:
return
if self.fade:
return
if string:
string = string.lower()
self._deiconify()
i = 1
for key in self.widgets.keys():
s = self.widgets[key].cget("text").lower()
text_similarity = difflib.SequenceMatcher(
None, s[0 : len(string)], string
).ratio()
similar = s.startswith(string) or text_similarity > 0.75
if not similar:
self.widgets[key].pack_forget()
else:
self.widgets[key].pack(fill="x", pady=2, padx=(self.padding, 0))
i += 1
if i == 1:
self.no_match.pack(fill="x", pady=2, padx=(self.padding, 0))
else:
self.no_match.pack_forget()
self.button_num = i
self.place_dropdown()
else:
self.no_match.pack_forget()
self.button_num = len(self.values)
for key in self.widgets.keys():
self.widgets[key].destroy()
self._init_buttons()
self.place_dropdown()
self.frame._parent_canvas.yview_moveto(0.0)
self.appear = False
def insert(self, value, **kwargs):
self.widgets[self.i] = customtkinter.CTkButton(
self.frame,
text=value,
height=self.button_height,
fg_color=self.button_color,
text_color=self.text_color,
anchor=self.justify,
command=lambda k=value: self._attach_key_press(k),
**kwargs
)
self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0))
self.i += 1
self.values.append(value)
def _deiconify(self):
if len(self.values) > 0:
self.pack_forget()
def popup(self, x=None, y=None):
self.x = x
self.y = y
self.hide = True
self._iconify()
def configure(self, **kwargs):
if "height" in kwargs:
self.height = kwargs.pop("height")
self.height_new = self.height
if "alpha" in kwargs:
self.alpha = kwargs.pop("alpha")
if "width" in kwargs:
self.width = kwargs.pop("width")
if "fg_color" in kwargs:
self.frame.configure(fg_color=kwargs.pop("fg_color"))
if "values" in kwargs:
self.values = kwargs.pop("values")
self.image_values = None
self.button_num = len(self.values)
for key in self.widgets.keys():
self.widgets[key].destroy()
self._init_buttons()
if "image_values" in kwargs:
self.image_values = kwargs.pop("image_values")
self.image_values = (
None
if len(self.image_values) != len(self.values)
else self.image_values
)
if self.image_values is not None:
i = 0
for key in self.widgets.keys():
self.widgets[key].configure(image=self.image_values[i])
i += 1
if "button_color" in kwargs:
for key in self.widgets.keys():
self.widgets[key].configure(fg_color=kwargs.pop("button_color"))
for key in self.widgets.keys():
self.widgets[key].configure(**kwargs)

774
src/view/main_window.py Normal file
View File

@ -0,0 +1,774 @@
import locale
import tkinter
import customtkinter as ctk
import utils.config_manager as cm
import utils.constants as c
import utils.dict_utils as du
import utils.path_helper as ph
from controller.main_controller import MainController
from model.config.config_google_api import ConfigGoogleApi
from model.config.config_subtitles import ConfigSubtitles
from model.config.config_whisperx import ConfigWhisperX
from model.transcription import Transcription
from PIL import Image
from utils.enums import AudioSource, Color, ComputeType, ModelSize, TranscriptionMethod
from utils.i18n import _
from .custom_widgets.ctk_input_dialog import CTkInputDialog
from .custom_widgets.ctk_scrollable_dropdown import CTkScrollableDropdown
class MainWindow(ctk.CTkFrame):
def __init__(
self,
parent,
config_whisperx: ConfigWhisperX,
config_google_api: ConfigGoogleApi,
config_subtitles: ConfigSubtitles,
):
super().__init__(parent)
# Configure grid of the window
self.grid_columnconfigure(1, weight=1)
self.grid_rowconfigure(2, weight=1)
# Init the configs
self._config_whisperx = config_whisperx
self._config_google_api = config_google_api
self._config_subtitles = config_subtitles
# Init the controller
self._controller = None
# Init the components of the window
self._init_sidebar()
self._init_main_content()
# State
self._transcribe_from_source = AudioSource.FILE
self._is_transcribing_from_mic = False
# To handle debouncing
self._after_id = None # To store the `after()` method ID
# GETTERS AND SETTERS
def set_controller(self, controller: MainController):
"""
Set the controller of the window.
:param controller: View controller
:type controller: MainController
"""
self._controller = controller
def _get_language_code(self):
return du.find_key_by_value(
dictionary=c.AUDIO_LANGUAGES, target_value=self.omn_audio_language.get()
)
def _get_whisperx_args(self):
whisperx_args = {}
if self.radio_var.get() == TranscriptionMethod.WHISPERX.value:
whisperx_args["should_translate"] = (
self.chk_whisper_options_translate.get() == 1
)
whisperx_args["should_subtitle"] = (
self.chk_whisper_options_subtitles.get() == 1
)
return whisperx_args
# WIDGETS INITIALIZATION
def _init_sidebar(self):
# Sidebar frame
self.frm_sidebar = ctk.CTkScrollableFrame(
master=self, width=230, corner_radius=0
)
self.frm_sidebar.grid(row=0, column=0, rowspan=4, sticky=ctk.NSEW)
self.frm_sidebar.grid_rowconfigure(10, weight=1)
# Logo label
self.logo_image = ctk.CTkImage(
light_image=Image.open(
ph.ROOT_PATH / ph.IMG_RELATIVE_PATH / "icon-light.png"
),
dark_image=Image.open(
ph.ROOT_PATH / ph.IMG_RELATIVE_PATH / "icon-dark.png"
),
size=(32, 32),
)
self.lbl_logo = ctk.CTkLabel(
master=self.frm_sidebar,
text=f" {c.APP_NAME}",
image=self.logo_image,
compound=ctk.LEFT,
font=ctk.CTkFont(size=22, weight="bold"),
)
self.lbl_logo.grid(row=0, column=0, padx=20, pady=(19, 0))
# ------------------
# Shared options frame
self.frm_shared_options = ctk.CTkFrame(master=self.frm_sidebar, border_width=2)
self.frm_shared_options.grid(row=1, column=0, padx=20, pady=(20, 0))
## 'Audio language' option menu
self.lbl_audio_language = ctk.CTkLabel(
master=self.frm_shared_options,
text=_("Audio language"),
font=ctk.CTkFont(size=14, weight="bold"),
)
self.lbl_audio_language.grid(row=0, column=0, padx=0, pady=(10, 0))
self.omn_audio_language = ctk.CTkOptionMenu(master=self.frm_shared_options)
CTkScrollableDropdown(
attach=self.omn_audio_language,
values=list(c.AUDIO_LANGUAGES.values()),
alpha=1,
)
self.omn_audio_language.grid(row=1, column=0, padx=20, pady=0, sticky=ctk.EW)
try:
self.omn_audio_language.set(
c.AUDIO_LANGUAGES[locale.getdefaultlocale()[0][:2]]
)
except Exception:
self.omn_audio_language.set("English")
## 'Transcribe from' option menu
self.lbl_transcribe_from = ctk.CTkLabel(
master=self.frm_shared_options,
text="Transcribe from",
font=ctk.CTkFont(size=14, weight="bold"),
)
self.lbl_transcribe_from.grid(row=2, column=0, padx=0, pady=(15, 0))
self.omn_transcribe_from = ctk.CTkOptionMenu(
master=self.frm_shared_options,
values=[e.value for e in AudioSource],
command=self._on_change_transcribe_from_event,
)
self.omn_transcribe_from.grid(row=3, column=0, padx=20, pady=0, sticky=ctk.EW)
self.omn_transcribe_from.set(AudioSource.FILE.value)
## 'Generate transcription' button
self.btn_generate_transcription = ctk.CTkButton(
master=self.frm_shared_options,
fg_color="green",
hover_color="darkgreen",
text=_("Generate transcription"),
command=lambda: self._on_generate_transcription(),
)
self.btn_generate_transcription.grid(
row=4, column=0, padx=20, pady=(25, 20), sticky=ctk.EW
)
# ------------------
# 'Transcribe using' frame
self.frm_transcribe_using = ctk.CTkFrame(
master=self.frm_sidebar, border_width=2
)
self.frm_transcribe_using.grid(row=2, column=0, padx=0, pady=(20, 0))
# 'Transcribe using' label
self.lbl_transcribe_using = ctk.CTkLabel(
master=self.frm_transcribe_using,
text=_("Transcribe using"),
font=ctk.CTkFont(size=14, weight="bold"), # 14 is the default size
)
self.lbl_transcribe_using.grid(row=0, column=0, padx=0, pady=(10, 12.5))
self.radio_var = tkinter.IntVar(value=TranscriptionMethod.WHISPERX.value)
self.rbt_transcribe_using_whisper = ctk.CTkRadioButton(
master=self.frm_transcribe_using,
variable=self.radio_var,
value=TranscriptionMethod.WHISPERX.value,
text="WhisperX (local)",
command=self._on_transcribe_using_change,
)
self.rbt_transcribe_using_whisper.grid(
row=1, column=0, padx=20, pady=0, sticky=ctk.W
)
self.rbt_transcribe_using_google = ctk.CTkRadioButton(
master=self.frm_transcribe_using,
variable=self.radio_var,
value=TranscriptionMethod.GOOGLE_API.value,
text="Google API (remote)",
command=self._on_transcribe_using_change,
)
self.rbt_transcribe_using_google.grid(
row=2, column=0, padx=20, pady=(7.5, 16), sticky=ctk.W
)
# ------------------
# Whisper options frame
self.frm_whisper_options = ctk.CTkFrame(master=self.frm_sidebar, border_width=2)
self.frm_whisper_options.grid(row=3, column=0, padx=20, pady=(20, 0))
## Title label
self.lbl_whisper_options = ctk.CTkLabel(
master=self.frm_whisper_options,
text="WhisperX options",
font=ctk.CTkFont(size=14, weight="bold"), # 14 is the default size
)
self.lbl_whisper_options.grid(row=0, column=0, padx=10, pady=(10, 12.5))
## 'Translate to English' checkbox
self.chk_whisper_options_translate = ctk.CTkCheckBox(
master=self.frm_whisper_options,
text="Translate to English",
command=self._on_chk_whisper_options_translate_change,
)
self.chk_whisper_options_translate.grid(
row=1, column=0, padx=20, pady=0, sticky=ctk.W
)
## 'Subtitles' checkbox
self.chk_whisper_options_subtitles = ctk.CTkCheckBox(
master=self.frm_whisper_options,
text="Generate subtitles",
command=self._on_whisper_options_subtitles_change,
)
self.chk_whisper_options_subtitles.grid(
row=2, column=0, padx=20, pady=(10, 0), sticky=ctk.W
)
## 'Show advanced options' button
self.btn_whisperx_show_advanced_options = ctk.CTkButton(
master=self.frm_whisper_options,
text=_("Show advanced options"),
command=self._on_show_advanced_options,
)
self.btn_whisperx_show_advanced_options.grid(
row=3, column=0, padx=20, pady=16, sticky=ctk.EW
)
# ------------------
# 'Google API options' frame
self.frm_google_api_options = ctk.CTkFrame(
master=self.frm_sidebar, border_width=2
)
self.frm_google_api_options.grid(
row=3, column=0, padx=20, pady=(20, 0), sticky=ctk.EW
)
# Hidden at first because WhisperX is the default transcription method
self.frm_google_api_options.grid_remove()
## Title label
self.lbl_google_api_options = ctk.CTkLabel(
master=self.frm_google_api_options,
text="Google API options",
font=ctk.CTkFont(size=14, weight="bold"), # 14 is the default size
)
self.lbl_google_api_options.grid(row=0, column=0, padx=10, pady=(10, 12.5))
## 'Set API key' button
self.btn_set_google_api_key = ctk.CTkButton(
master=self.frm_google_api_options,
text=_("Set API key"),
command=self._on_set_google_api_key,
)
self.btn_set_google_api_key.grid(
row=1, column=0, padx=20, pady=(0, 20), sticky=ctk.EW
)
# ------------------
# Subtitle options frame
self.frm_subtitle_options = ctk.CTkFrame(
master=self.frm_sidebar, border_width=2
)
self.frm_subtitle_options.grid(
row=4, column=0, padx=20, pady=(20, 0), sticky=ctk.EW
)
self.frm_subtitle_options.grid_remove() # Hidden by default
## Title label
self.lbl_subtitle_options = ctk.CTkLabel(
master=self.frm_subtitle_options,
text="Subtitle options",
font=ctk.CTkFont(size=14, weight="bold"), # 14 is the default size
)
self.lbl_subtitle_options.grid(
row=0, column=0, padx=40, pady=(10, 0), sticky=ctk.EW
)
## 'Highlight words' check box
self.chk_highlight_words = ctk.CTkCheckBox(
master=self.frm_subtitle_options,
text="Highlight words",
command=lambda: self._on_config_change(
section=ConfigSubtitles.Key.SECTION,
key=ConfigSubtitles.Key.HIGHLIGHT_WORDS,
new_value="True" if self.chk_highlight_words.get() else "False",
),
)
self.chk_highlight_words.grid(row=1, column=0, padx=20, pady=10, sticky=ctk.W)
## 'Max. line count' entry
self.lbl_max_line_count = ctk.CTkLabel(
master=self.frm_subtitle_options,
text=_("Max. line count"),
)
self.lbl_max_line_count.grid(
row=2, column=0, padx=(52, 0), pady=0, sticky=ctk.W
)
self.max_line_count = ctk.StringVar(
self, str(self._config_subtitles.max_line_count)
)
self._setup_debounced_change(
section=ConfigSubtitles.Key.SECTION,
key=ConfigSubtitles.Key.MAX_LINE_COUNT,
variable=self.max_line_count,
callback=self._on_config_change,
)
self.ent_max_line_count = ctk.CTkEntry(
master=self.frm_subtitle_options,
width=28,
textvariable=self.max_line_count,
)
self.ent_max_line_count.grid(
row=2, column=0, padx=(18, 20), pady=0, sticky=ctk.W
)
## 'Max. line width' entry
self.lbl_max_line_width = ctk.CTkLabel(
master=self.frm_subtitle_options,
text=_("Max. line width"),
)
self.lbl_max_line_width.grid(
row=3, column=0, padx=(52, 0), pady=(10, 14), sticky=ctk.W
)
self.max_line_width = ctk.StringVar(
self, str(self._config_subtitles.max_line_width)
)
self._setup_debounced_change(
section=ConfigSubtitles.Key.SECTION,
key=ConfigSubtitles.Key.MAX_LINE_WIDTH,
variable=self.max_line_width,
callback=self._on_config_change,
)
self.ent_max_line_width = ctk.CTkEntry(
master=self.frm_subtitle_options,
width=28,
textvariable=self.max_line_width,
)
self.ent_max_line_width.grid(
row=3, column=0, padx=(18, 20), pady=(10, 14), sticky=ctk.W
)
# ------------------
# WhisperX advanced options frame
self.frm_whisperx_advanced_options = ctk.CTkFrame(
master=self.frm_sidebar, border_width=2
)
self.frm_whisperx_advanced_options.grid(
row=5, column=0, padx=20, pady=(20, 0), sticky=ctk.EW
)
self.frm_whisperx_advanced_options.grid_remove() # Hidden by default
## Title label
self.lbl_advanced_options = ctk.CTkLabel(
master=self.frm_whisperx_advanced_options,
text="Advanced options",
font=ctk.CTkFont(size=14, weight="bold"), # 14 is the default size
)
self.lbl_advanced_options.grid(
row=0, column=0, padx=10, pady=(10, 5), sticky=ctk.EW
)
## 'Model size' option menu
self.lbl_model_size = ctk.CTkLabel(
master=self.frm_whisperx_advanced_options,
text="Model size",
)
self.lbl_model_size.grid(row=1, column=0, padx=20, pady=0, sticky=ctk.W)
self.omn_model_size = ctk.CTkOptionMenu(
master=self.frm_whisperx_advanced_options,
values=[model_size.value for model_size in ModelSize.__members__.values()],
command=lambda *args: self._on_config_change(
section=ConfigWhisperX.Key.SECTION,
key=ConfigWhisperX.Key.MODEL_SIZE,
new_value=self.omn_model_size.get(),
),
)
self.omn_model_size.grid(row=2, column=0, padx=20, pady=(3, 10), sticky=ctk.EW)
self.omn_model_size.set(self._config_whisperx.model_size)
## 'Compute type' option menu
self.lbl_compute_type = ctk.CTkLabel(
master=self.frm_whisperx_advanced_options,
text="Compute type",
)
self.lbl_compute_type.grid(row=3, column=0, padx=20, pady=0, sticky=ctk.W)
self.omn_compute_type = ctk.CTkOptionMenu(
master=self.frm_whisperx_advanced_options,
values=[
compute_type.value for compute_type in ComputeType.__members__.values()
],
command=lambda *args: self._on_config_change(
section=ConfigWhisperX.Key.SECTION,
key=ConfigWhisperX.Key.COMPUTE_TYPE,
new_value=self.omn_compute_type.get(),
),
)
self.omn_compute_type.grid(
row=4, column=0, padx=20, pady=(3, 17), sticky=ctk.EW
)
self.omn_compute_type.set(self._config_whisperx.compute_type)
## 'Batch size' entry
self.lbl_batch_size = ctk.CTkLabel(
master=self.frm_whisperx_advanced_options,
text="Batch size",
)
self.lbl_batch_size.grid(row=5, column=0, padx=(50, 0), pady=0, sticky=ctk.W)
self.batch_size = ctk.StringVar(self, str(self._config_whisperx.batch_size))
self._setup_debounced_change(
section=ConfigWhisperX.Key.SECTION,
key=ConfigWhisperX.Key.BATCH_SIZE,
variable=self.batch_size,
callback=self._on_config_change,
)
self.ent_batch_size = ctk.CTkEntry(
master=self.frm_whisperx_advanced_options,
width=28,
textvariable=self.batch_size,
)
self.ent_batch_size.grid(row=5, column=0, padx=(18, 20), pady=0, sticky=ctk.W)
## 'Use CPU' checkbox
self.chk_use_cpu = ctk.CTkCheckBox(
master=self.frm_whisperx_advanced_options,
text="Use CPU",
command=lambda: self._on_config_change(
section=ConfigWhisperX.Key.SECTION,
key=ConfigWhisperX.Key.USE_CPU,
new_value="True" if self.chk_use_cpu.get() else "False",
),
)
self.chk_use_cpu.grid(row=6, column=0, padx=20, pady=(10, 16), sticky=ctk.W)
if self._config_whisperx.use_cpu:
self.chk_use_cpu.select()
if not self._config_whisperx.can_use_gpu:
self.chk_use_cpu.select()
self.chk_use_cpu.configure(state=ctk.DISABLED)
# ------------------
## 'Appearance mode' option menu
self.lbl_appearance_mode = ctk.CTkLabel(
master=self.frm_sidebar,
text=_("Appearance mode"),
anchor=ctk.W,
font=ctk.CTkFont(size=14, weight="bold"),
)
self.lbl_appearance_mode.grid(row=12, column=0, padx=20, pady=(50, 0))
self.omn_appearance_mode = ctk.CTkOptionMenu(
master=self.frm_sidebar,
values=[_("System"), _("Light"), _("Dark")],
command=self._change_appearance_mode_event,
)
self.omn_appearance_mode.grid(row=13, column=0, padx=20, pady=0, sticky=ctk.EW)
## Info label
self.lbl_info = ctk.CTkLabel(
master=self.frm_sidebar,
text="v2.2.0 | Made by HenestrosaDev",
font=ctk.CTkFont(size=12),
)
self.lbl_info.grid(row=14, column=0, padx=20, pady=(5, 10))
def _init_main_content(self):
# Main entry frame
self.frm_main_entry = ctk.CTkFrame(master=self, fg_color="transparent")
self.frm_main_entry.grid(row=0, column=1, padx=20, pady=(20, 0), sticky=ctk.EW)
self.frm_main_entry.grid_columnconfigure(1, weight=1)
## 'Path' entry
self.lbl_path = ctk.CTkLabel(
master=self.frm_main_entry,
text="File path",
font=ctk.CTkFont(size=14, weight="bold"),
)
self.lbl_path.grid(row=0, column=0, padx=(0, 15), sticky=ctk.W)
self.ent_path = ctk.CTkEntry(master=self.frm_main_entry)
self.ent_path.grid(row=0, column=1, padx=0, sticky=ctk.EW)
## File explorer image button
self.img_file_explorer = ctk.CTkImage(
Image.open(ph.ROOT_PATH / ph.IMG_RELATIVE_PATH / "file-explorer.png"),
size=(24, 24),
)
self.btn_file_explorer = ctk.CTkButton(
self.frm_main_entry,
image=self.img_file_explorer,
text="",
width=32,
command=self._on_select_file,
)
self.btn_file_explorer.grid(row=0, column=2, padx=(15, 0), sticky=ctk.E)
## Textbox
self.tbx_transcription = ctk.CTkTextbox(master=self, wrap=ctk.WORD)
self.tbx_transcription.grid(row=2, column=1, padx=20, pady=20, sticky=ctk.NSEW)
## Progress bar
self.progress_bar = ctk.CTkProgressBar(master=self)
self.progress_bar.configure(mode="indeterminate")
## 'Save transcription' button
self.btn_save = ctk.CTkButton(
master=self,
fg_color="green",
hover_color="darkgreen",
text=_("Save transcription"),
command=self._on_save_transcription,
)
self.btn_save.grid(row=3, column=1, padx=20, pady=(0, 20), sticky=ctk.EW)
# PUBLIC METHODS
def on_select_file_success(self, filepath):
self.ent_path.configure(textvariable=ctk.StringVar(self, filepath))
def on_processing_transcription(self):
# Disable action buttons to avoid multiple requests at the same time
self.ent_path.configure(state=ctk.DISABLED)
self.omn_transcribe_from.configure(state=ctk.DISABLED)
self.omn_audio_language.configure(state=ctk.DISABLED)
if not self._is_transcribing_from_mic:
self.btn_generate_transcription.configure(state=ctk.DISABLED)
# Show progress bar
self._toggle_progress_bar_visibility(should_show=True)
# Remove previous text
self.display_text("")
def on_processed_transcription(self, success: bool):
# Re-enable disabled widgets
self.ent_path.configure(state=ctk.NORMAL)
self.omn_transcribe_from.configure(state=ctk.NORMAL)
self.omn_audio_language.configure(state=ctk.NORMAL)
self.btn_generate_transcription.configure(state=ctk.NORMAL)
self._toggle_progress_bar_visibility(should_show=False)
# HELPER METHODS
def display_text(self, text):
self.tbx_transcription.delete("1.0", ctk.END)
self.tbx_transcription.insert("0.0", text)
# PRIVATE METHODS
def _setup_debounced_change(self, section, key, variable, callback, *unused):
variable.trace_add(
mode="write",
callback=lambda *args: self._on_change_debounced(
section, key, variable, callback
),
)
def _on_change_debounced(self, section, key, variable, callback, delay=600):
# Cancel the previously scheduled after call
if self._after_id is not None:
self.after_cancel(self._after_id)
# Schedule a new after call with the specified delay
self._after_id = self.after(
delay, lambda: callback(section, key, variable.get())
)
def _on_change_app_language(self, language_name: str):
self._controller.change_app_language(language_name)
def _on_change_transcribe_from_event(self, option: str):
self._transcribe_from_source = AudioSource(option)
self.ent_path.configure(textvariable=ctk.StringVar(self, ""))
if self._transcribe_from_source == AudioSource.FILE:
self.btn_generate_transcription.configure(text="Generate transcription")
self.lbl_path.configure(text="File path")
self.btn_file_explorer.grid()
self.frm_main_entry.grid()
elif self._transcribe_from_source == AudioSource.MIC:
self.btn_generate_transcription.configure(text="Start recording")
self.frm_main_entry.grid_remove()
elif self._transcribe_from_source == AudioSource.YOUTUBE:
self.btn_generate_transcription.configure(text="Generate transcription")
self.lbl_path.configure(text="YouTube video URL")
self.btn_file_explorer.grid_remove()
self.frm_main_entry.grid()
def _on_select_file(self):
self._controller.select_file()
def _on_transcribe_from_mic(self):
if self._is_transcribing_from_mic:
self.stop_recording_from_mic()
else:
self._start_recording_from_mic()
def _start_recording_from_mic(self):
self._is_transcribing_from_mic = True
self.btn_generate_transcription.configure(
fg_color=(Color.LIGHT_RED.value, Color.DARK_RED.value),
hover_color=(
Color.HOVER_LIGHT_RED.value,
Color.HOVER_DARK_RED.value,
),
text=_("Stop recording"),
)
transcription = Transcription(
source=AudioSource.MIC,
language_code=self._get_language_code(),
method=self.radio_var.get(),
**self._get_whisperx_args(),
)
self._controller.prepare_for_transcription(transcription)
def stop_recording_from_mic(self):
self._is_transcribing_from_mic = False
self.btn_generate_transcription.configure(
fg_color="green",
hover_color="darkgreen",
text="Start recording",
state=ctk.DISABLED,
)
self._controller.stop_recording_from_mic()
def _on_generate_transcription(self):
self.ent_path.configure(state=ctk.DISABLED)
self.omn_transcribe_from.configure(state=ctk.DISABLED)
self.omn_audio_language.configure(state=ctk.DISABLED)
transcription = Transcription(
language_code=self._get_language_code(),
method=self.radio_var.get(),
**self._get_whisperx_args(),
)
if self._transcribe_from_source == AudioSource.FILE:
transcription.source = AudioSource.FILE
transcription.source_file_path = self.ent_path.get()
self._controller.prepare_for_transcription(transcription)
elif self._transcribe_from_source == AudioSource.MIC:
self._on_transcribe_from_mic()
elif self._transcribe_from_source == AudioSource.YOUTUBE:
transcription.source = AudioSource.YOUTUBE
transcription.youtube_url = self.ent_path.get()
self._controller.prepare_for_transcription(transcription)
def _on_save_transcription(self):
self._controller.save_transcription()
def _on_transcribe_using_change(self):
if self.radio_var.get() == TranscriptionMethod.WHISPERX.value:
self.frm_whisper_options.grid()
self.frm_google_api_options.grid_remove()
elif self.radio_var.get() == TranscriptionMethod.GOOGLE_API.value:
self.frm_whisper_options.grid_remove()
self.frm_whisperx_advanced_options.grid_remove()
self.btn_whisperx_show_advanced_options.configure(
text=_("Show advanced options")
)
self.frm_google_api_options.grid()
def _on_set_google_api_key(self):
old_api_key = self._config_google_api.api_key
dialog = CTkInputDialog(
title="Google API key",
label_text="Type in the API key:",
entry_text=old_api_key,
)
new_api_key = dialog.get_input()
if new_api_key is not None and old_api_key != new_api_key:
self._on_config_change(
section=ConfigGoogleApi.Key.SECTION,
key=ConfigGoogleApi.Key.API_KEY,
new_value=new_api_key.strip(),
)
def _on_chk_whisper_options_translate_change(self):
if self.chk_whisper_options_translate.get():
self.chk_whisper_options_subtitles.deselect()
self.chk_whisper_options_subtitles.configure(state=ctk.DISABLED)
self.frm_subtitle_options.grid_remove()
else:
self.chk_whisper_options_subtitles.configure(state=ctk.NORMAL)
def _on_whisper_options_subtitles_change(self):
if self.chk_whisper_options_subtitles.get():
self.frm_subtitle_options.grid()
else:
self.frm_subtitle_options.grid_remove()
def _on_show_advanced_options(self):
if self.frm_whisperx_advanced_options.winfo_ismapped():
self.frm_whisperx_advanced_options.grid_remove()
self.btn_whisperx_show_advanced_options.configure(
text=_("Show advanced options")
)
else:
self.frm_whisperx_advanced_options.grid()
self.btn_whisperx_show_advanced_options.configure(
text=_("Hide advanced options")
)
def _toggle_progress_bar_visibility(self, should_show):
if should_show:
self.progress_bar.grid(row=2, column=1, padx=40, pady=0, sticky=ctk.EW)
self.progress_bar.start()
else:
self.progress_bar.grid_forget()
@staticmethod
def _on_config_change(section, key, new_value):
cm.ConfigManager.modify_value(section, key, new_value)
@staticmethod
def _change_appearance_mode_event(new_appearance_mode: str):
appearance_mode_map = {
_("Dark"): "Dark",
_("Light"): "Light",
_("System"): "System",
}
appearance_mode = appearance_mode_map.get(new_appearance_mode, "System")
ctk.set_appearance_mode(appearance_mode)