From 38cc5863914da030df55f6f2d03a94ac05b95117 Mon Sep 17 00:00:00 2001 From: cyber <19499442+cyberofficial@users.noreply.github.com> Date: Thu, 26 Sep 2024 15:54:30 -0400 Subject: [PATCH 1/3] Fix: Display headers correctly based on URL parameters --- html_data/index.html | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/html_data/index.html b/html_data/index.html index a721ba5..3448d91 100644 --- a/html_data/index.html +++ b/html_data/index.html @@ -30,16 +30,20 @@ const showTranslation = params.has("showtranslation"); const showTranscription = params.has("showtranscription"); - if (showOriginal) { + if (!showOriginal && !showTranslation && !showTranscription) { showElementById("header-text"); - } - - if (showTranslation) { showElementById("translated-header"); - } - - if (showTranscription) { showElementById("transcribed-header"); + } else { + if (showOriginal) { + showElementById("header-text"); + } + if (showTranslation) { + showElementById("translated-header"); + } + if (showTranscription) { + showElementById("transcribed-header"); + } } }); From 76ecb698a6cb98f22285989d75eaf8f59b19d362 Mon Sep 17 00:00:00 2001 From: cyber <19499442+cyberofficial@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:50:58 -0400 Subject: [PATCH 2/3] [feature update] New Arguments and Improvements * Customize the Captions of the Web Player. Preview: https://streamable.com/7cd1kk ------ - Added new argument "--fp16". This allows for more accurate information being passed to the process. This will grant the AL the ability to process more information at the cost of speed. You will not see heavy impact on stronger hardware. - You can now choose between 2 12GB Models, V2 and V3, using the ram argument flag like normal, but instead of 12gb, it's "12gb-v2", "12gb-v3" now. 12GB v2 - More Stable, Much Faster, Good for High End GTX devices, Overkill for RTX Devices. 12GB v3 - More Accurate, Tiny bit Slower, Good for High End RTX devices Combine 12gb-v3 + fp16 Flags (Precision Mode on the GUI) for the ultimate experience. - The Stream Transcription module had some fixes applied onto it. - Subtitle Creator will work with FP16 mode and missing subflag was added. - GUI Has new elements to handle the new arguments. Also some minor spelling mistakes were zapped. - Microphone Mode had some improvements made and fixes applied to it. --- README.md | 1 + Synthalingua_Wrapper/App.config | 5 +- Synthalingua_Wrapper/MainUI.Designer.vb | 28 ++- Synthalingua_Wrapper/MainUI.resx | 15 +- Synthalingua_Wrapper/MainUI.vb | 12 +- .../PublishProfiles/FolderProfile.pubxml.user | 2 +- .../My Project/Settings.Designer.vb | 20 +- .../My Project/Settings.settings | 7 +- Synthalingua_Wrapper/Synthalingua_Wrapper.sln | 6 - .../Synthalingua_Wrapper.vbproj | 8 +- html_data/player.html | 172 +++++++++++++++--- modules/parser_args.py | 27 ++- modules/stream_transcription_module.py | 12 +- modules/sub_gen.py | 2 +- transcribe_audio.py | 30 +-- 15 files changed, 275 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index 8b04c05..df6ffb3 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,7 @@ This script uses argparse to accept command line arguments. The following option | ---- | ----------- | | `--ram` | Change the amount of RAM to use. Default is 4GB. Choices are "1GB", "2GB", "4GB", "6GB", "12GB". | | `--ramforce` | Use this flag to force the script to use desired VRAM. May cause the script to crash if there is not enough VRAM available. | +| `--fp16` | This allows for more accurate information being passed to the process. This will grant the AL the ability to process more information at the cost of speed. You will not see heavy impact on stronger hardware. Combine 12gb-v3 + fp16 Flags (Precision Mode on the GUI) for the ultimate experience. | | `--energy_threshold` | Set the energy level for microphone to detect. Default is 100. Choose from 1 to 1000; anything higher will be harder to trigger the audio detection. | | `--mic_calibration_time` | How long to calibrate the mic for in seconds. To skip user input type 0 and time will be set to 5 seconds. | | `--record_timeout` | Set the time in seconds for real-time recording. Default is 2 seconds. | diff --git a/Synthalingua_Wrapper/App.config b/Synthalingua_Wrapper/App.config index ad9a14f..16c2a34 100644 --- a/Synthalingua_Wrapper/App.config +++ b/Synthalingua_Wrapper/App.config @@ -25,7 +25,7 @@ True - 1 + 1gb False @@ -147,6 +147,9 @@ + + False + diff --git a/Synthalingua_Wrapper/MainUI.Designer.vb b/Synthalingua_Wrapper/MainUI.Designer.vb index d0e10e5..39e1ad6 100644 --- a/Synthalingua_Wrapper/MainUI.Designer.vb +++ b/Synthalingua_Wrapper/MainUI.Designer.vb @@ -93,6 +93,8 @@ Partial Class MainUI Label9 = New Label() CaptionsInputBtn = New Button() TabPage4 = New TabPage() + Label17 = New Label() + PrecisionCheckBox = New CheckBox() Label16 = New Label() modelDIr = New TextBox() modelDirPicker = New Button() @@ -261,7 +263,7 @@ Partial Class MainUI RamSize.AutoCompleteSource = AutoCompleteSource.ListItems RamSize.DropDownStyle = ComboBoxStyle.DropDownList RamSize.FormattingEnabled = True - RamSize.Items.AddRange(New Object() {"1gb", "2gb", "4gb", "6gb", "12gb"}) + RamSize.Items.AddRange(New Object() {"1gb", "2gb", "4gb", "6gb", "12gb-v2", "12gb-v3"}) RamSize.Location = New Point(71, 5) RamSize.Margin = New Padding(3, 2, 3, 2) RamSize.Name = "RamSize" @@ -904,6 +906,8 @@ Partial Class MainUI ' TabPage4 ' TabPage4.BackColor = Color.DarkSlateBlue + TabPage4.Controls.Add(Label17) + TabPage4.Controls.Add(PrecisionCheckBox) TabPage4.Controls.Add(Label16) TabPage4.Controls.Add(Label2) TabPage4.Controls.Add(RamSize) @@ -917,6 +921,26 @@ Partial Class MainUI TabPage4.TabIndex = 3 TabPage4.Text = "Model Settings" ' + ' Label17 + ' + Label17.AutoSize = True + Label17.Font = New Font("Segoe UI", 12F) + Label17.Location = New Point(6, 88) + Label17.Name = "Label17" + Label17.Size = New Size(443, 147) + Label17.TabIndex = 9 + Label17.Text = resources.GetString("Label17.Text") + ' + ' PrecisionCheckBox + ' + PrecisionCheckBox.AutoSize = True + PrecisionCheckBox.Location = New Point(241, 7) + PrecisionCheckBox.Name = "PrecisionCheckBox" + PrecisionCheckBox.Size = New Size(108, 19) + PrecisionCheckBox.TabIndex = 8 + PrecisionCheckBox.Text = "Precision Mode" + PrecisionCheckBox.UseVisualStyleBackColor = True + ' ' Label16 ' Label16.AutoSize = True @@ -1431,5 +1455,7 @@ Partial Class MainUI Friend WithEvents Label16 As Label Friend WithEvents modelDirPicker As Button Friend WithEvents FolderBrowserDialog2 As FolderBrowserDialog + Friend WithEvents PrecisionCheckBox As CheckBox + Friend WithEvents Label17 As Label End Class diff --git a/Synthalingua_Wrapper/MainUI.resx b/Synthalingua_Wrapper/MainUI.resx index 9169d4a..2c29c24 100644 --- a/Synthalingua_Wrapper/MainUI.resx +++ b/Synthalingua_Wrapper/MainUI.resx @@ -1,7 +1,7 @@  - True|2024-09-26T19:09:54.5357885Z||;True|2024-09-18T03:09:20.6489818-04:00||;True|2024-08-08T02:50:46.6107116-04:00||;True|2024-08-08T02:34:00.3980329-04:00||;True|2024-08-08T02:31:56.2576355-04:00||;True|2024-08-08T02:27:59.1611557-04:00||;True|2024-08-08T02:26:48.4386992-04:00||;True|2024-08-08T02:24:08.4320611-04:00||;True|2024-08-08T02:19:01.0250722-04:00||;True|2024-08-08T01:51:56.8183777-04:00||;True|2024-08-08T01:50:23.0868936-04:00||;True|2024-08-08T01:50:10.4543482-04:00||;True|2024-08-08T01:49:59.7945394-04:00||;True|2024-08-08T01:47:41.5885686-04:00||;True|2024-08-08T01:46:26.9076296-04:00||;True|2024-08-08T01:46:06.3593091-04:00||;True|2024-08-08T01:44:52.6974951-04:00||;True|2024-08-08T01:43:33.9183523-04:00||;True|2024-08-08T01:43:11.3344818-04:00||;True|2024-08-08T01:40:22.8767718-04:00||;True|2024-08-08T01:38:27.2167559-04:00||; + True|2024-09-26T22:52:16.8047523Z||;True|2024-09-26T18:45:33.6369257-04:00||;True|2024-09-26T17:43:15.5416964-04:00||;True|2024-09-26T17:13:09.1691137-04:00||;True|2024-09-26T17:08:51.5207150-04:00||;True|2024-09-26T15:09:54.5357885-04:00||;True|2024-09-18T03:09:20.6489818-04:00||;True|2024-08-08T02:50:46.6107116-04:00||;True|2024-08-08T02:34:00.3980329-04:00||;True|2024-08-08T02:31:56.2576355-04:00||;True|2024-08-08T02:27:59.1611557-04:00||;True|2024-08-08T02:26:48.4386992-04:00||;True|2024-08-08T02:24:08.4320611-04:00||;True|2024-08-08T02:19:01.0250722-04:00||;True|2024-08-08T01:51:56.8183777-04:00||;True|2024-08-08T01:50:23.0868936-04:00||;True|2024-08-08T01:50:10.4543482-04:00||;True|2024-08-08T01:49:59.7945394-04:00||;True|2024-08-08T01:47:41.5885686-04:00||;True|2024-08-08T01:46:26.9076296-04:00||;True|2024-08-08T01:46:06.3593091-04:00||;True|2024-08-08T01:44:52.6974951-04:00||;True|2024-08-08T01:43:33.9183523-04:00||;True|2024-08-08T01:43:11.3344818-04:00||;True|2024-08-08T01:40:22.8767718-04:00||;True|2024-08-08T01:38:27.2167559-04:00||; \ No newline at end of file diff --git a/Synthalingua_Wrapper/My Project/Settings.Designer.vb b/Synthalingua_Wrapper/My Project/Settings.Designer.vb index e3191f5..cd6a8f4 100644 --- a/Synthalingua_Wrapper/My Project/Settings.Designer.vb +++ b/Synthalingua_Wrapper/My Project/Settings.Designer.vb @@ -15,7 +15,7 @@ Option Explicit On Namespace My _ Partial Friend NotInheritable Class MySettings Inherits Global.System.Configuration.ApplicationSettingsBase @@ -116,10 +116,10 @@ Namespace My _ - Public Property RamSize() As Integer + Global.System.Configuration.DefaultSettingValueAttribute("1gb")> _ + Public Property RamSize() As String Get - Return CType(Me("RamSize"),Integer) + Return CType(Me("RamSize"),String) End Get Set Me("RamSize") = value @@ -605,6 +605,18 @@ Namespace My Me("modelDIr") = value End Set End Property + + _ + Public Property fp16() As Boolean + Get + Return CType(Me("fp16"),Boolean) + End Get + Set + Me("fp16") = value + End Set + End Property End Class End Namespace diff --git a/Synthalingua_Wrapper/My Project/Settings.settings b/Synthalingua_Wrapper/My Project/Settings.settings index 7dbf74f..e360e5a 100644 --- a/Synthalingua_Wrapper/My Project/Settings.settings +++ b/Synthalingua_Wrapper/My Project/Settings.settings @@ -17,8 +17,8 @@ True - - 1 + + 1gb False @@ -140,5 +140,8 @@ + + False + \ No newline at end of file diff --git a/Synthalingua_Wrapper/Synthalingua_Wrapper.sln b/Synthalingua_Wrapper/Synthalingua_Wrapper.sln index caf0e8c..8e7720b 100644 --- a/Synthalingua_Wrapper/Synthalingua_Wrapper.sln +++ b/Synthalingua_Wrapper/Synthalingua_Wrapper.sln @@ -7,18 +7,12 @@ Project("{778DAE3C-4631-46EA-AA77-85C1314464D9}") = "Synthalingua_Wrapper", "Syn EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU Debug|x64 = Debug|x64 - Release|Any CPU = Release|Any CPU Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Debug|Any CPU.Build.0 = Debug|Any CPU {CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Debug|x64.ActiveCfg = Debug|x64 {CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Debug|x64.Build.0 = Debug|x64 - {CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Release|Any CPU.ActiveCfg = Release|Any CPU - {CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Release|Any CPU.Build.0 = Release|Any CPU {CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Release|x64.ActiveCfg = Release|x64 {CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Release|x64.Build.0 = Release|x64 EndGlobalSection diff --git a/Synthalingua_Wrapper/Synthalingua_Wrapper.vbproj b/Synthalingua_Wrapper/Synthalingua_Wrapper.vbproj index c4e857d..875ea8c 100644 --- a/Synthalingua_Wrapper/Synthalingua_Wrapper.vbproj +++ b/Synthalingua_Wrapper/Synthalingua_Wrapper.vbproj @@ -12,20 +12,20 @@ syntha.png assets\syntha.ico SynthalinguaGUI - Build Date: Sep 26 2024 3:07 PM EDT + Build Date: Sep 26 2024 6:50 PM EDT 4 - 1.1.4.44 + 1.1.4.45 1 https://github.com/cyberofficial/Synthalingua git - AnyCPU + x64 - AnyCPU + x64 diff --git a/html_data/player.html b/html_data/player.html index e9ae1c2..1d79bd6 100644 --- a/html_data/player.html +++ b/html_data/player.html @@ -8,7 +8,6 @@ - @@ -97,14 +128,107 @@
+ + +
Customize Captions
+
-

-

-

+

+

+

+
+
+ + + + + - \ No newline at end of file + diff --git a/modules/parser_args.py b/modules/parser_args.py index 5bb72e9..8a885f4 100644 --- a/modules/parser_args.py +++ b/modules/parser_args.py @@ -30,30 +30,45 @@ def set_model_by_ram(ram, language): elif ram == "6gb": if language == "en" or language == "English": model = "medium.en" - else: model = "medium" - elif ram == "12gb": - model = "large-v3" + elif ram == "12gb-v2" or ram == "12gb-v3": + # Determine the model based on the version + if ram == "12gb-v2": + model = "large-v2" + version = "Version 2" + else: # ram == "12gb-v3" + model = "large-v3" + version = "Version 3" + + # Warning for English language if language == "en" or language == "English": red_text = Fore.RED + Back.BLACK green_text = Fore.GREEN + Back.BLACK yellow_text = Fore.YELLOW + Back.BLACK reset_text = Style.RESET_ALL - print(f"{red_text}WARNING{reset_text}: {yellow_text}12gb{reset_text} is overkill for English. Do you want to swap to {green_text}6gb{reset_text} model? If you are transcribing a language other than English, you can ignore this warning and press {green_text}n{reset_text}.") + + print(f"{red_text}WARNING{reset_text}: {yellow_text}12gb{reset_text} is overkill for English. " + f"Do you want to swap to the {green_text}6gb{reset_text} model? " + f"If you are transcribing a language other than English, you can ignore this warning and press {green_text}n{reset_text}.") + if input("y/n: ").lower() == "y": model = "medium.en" else: - model = "large-v3" + print(f"Using 12GB {version}") + ram = "12gb" # Normalize ram to "12gb" for both v2 and v3 + else: raise ValueError("Invalid RAM setting provided") return model + def parse_arguments(): parser = argparse.ArgumentParser() - parser.add_argument("--ram", default="4gb", help="Model to use", choices=["1gb", "2gb", "4gb", "6gb", "12gb"]) + parser.add_argument("--ram", default="4gb", help="Model to use", choices=["1gb", "2gb", "4gb", "6gb", "12gb-v2", "12gb-v3"]) parser.add_argument("--ramforce", action='store_true', help="Force the model to use the RAM setting provided. Warning: This may cause the model to crash.") + parser.add_argument("--fp16", action='store_true', default=False, help="Sets Models to FP16 Mode, Heavy on Usage, but more accurate") parser.add_argument("--energy_threshold", default=100, help="Energy level for mic to detect.", type=int) parser.add_argument("--mic_calibration_time", help="How long to calibrate the mic for in seconds. To skip user input type 0 and time will be set to 5 seconds.", type=int) parser.add_argument("--record_timeout", default=1, help="How real time the recording is in seconds.", type=float) diff --git a/modules/stream_transcription_module.py b/modules/stream_transcription_module.py index ca4e4ef..32bc571 100644 --- a/modules/stream_transcription_module.py +++ b/modules/stream_transcription_module.py @@ -139,7 +139,7 @@ def combine_audio_segments(segment_paths, output_path): def translate_audio(file_path, model): try: - result = model.transcribe(file_path, task="translate", language=stream_language) + result = model.transcribe(file_path, task="translate", fp16=args.fp16, language=stream_language, condition_on_previous_text=args.condition_on_previous_text) return result["text"] except RuntimeError as e: print(f"Error transcribing audio: {e}") @@ -147,7 +147,7 @@ def translate_audio(file_path, model): def transcribe_audio(file_path, model, language): try: - result = model.transcribe(file_path, language=language) + result = model.transcribe(file_path, language=language, fp16=args.fp16, condition_on_previous_text=args.condition_on_previous_text) return result["text"] except RuntimeError as e: print(f"Error transcribing audio: {e}") @@ -157,10 +157,15 @@ def detect_language(file_path, model, device=args.device): try: audio = whisper.load_audio(file_path) audio = whisper.pad_or_trim(audio) - if args.ram == "12gb": + + # Handle both "12gb-v2" and "12gb-v3" + if args.ram == "12gb-v2": + mel = whisper.log_mel_spectrogram(audio, n_mels=80).to(device) + elif args.ram == "12gb-v3": mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(device) else: mel = whisper.log_mel_spectrogram(audio, n_mels=80).to(device) + _, language_probs = model.detect_language(mel) detected_language = max(language_probs, key=language_probs.get) return detected_language @@ -169,6 +174,7 @@ def detect_language(file_path, model, device=args.device): detected_language = "n/a" return detected_language + def process_audio(file_path, model): if not os.path.exists(file_path): print(f"Warning: File {file_path} does not exist, skipping.") diff --git a/modules/sub_gen.py b/modules/sub_gen.py index 75ae35f..d46ac81 100644 --- a/modules/sub_gen.py +++ b/modules/sub_gen.py @@ -14,7 +14,7 @@ def run_sub_gen(input_path: str, output_name: str = "", output_directory: str = print("Setting Path") print("Doing the work now...") print("This may take a while, sit back and get a coffee or something.") - result = model.transcribe(input_path, language=args.language, task="translate") + result = model.transcribe(input_path, fp16=args.fp16, language=args.language, task="translate", condition_on_previous_text=args.condition_on_previous_text) print("Setting writer Up") writer = get_writer("srt", str(output_directory)) diff --git a/transcribe_audio.py b/transcribe_audio.py index 4493d1a..a7bd277 100644 --- a/transcribe_audio.py +++ b/transcribe_audio.py @@ -277,7 +277,7 @@ def mic_calibration(): cuda_vram = torch.cuda.get_device_properties(torch.cuda.current_device()).total_memory / 1024 / 1024 overhead_buffer = 200 - ram_options = [("12gb", 12000), ("6gb", 6144), ("4gb", 4096), ("2gb", 2048), ("1gb", 1024)] + ram_options = [("12gb-v2", 12000), ("6gb", 6144), ("4gb", 4096), ("2gb", 2048), ("1gb", 1024)] found = False old_ram_flag = args.ram @@ -553,7 +553,9 @@ def mic_calibration(): audio = whisper.load_audio(temp_file) audio = whisper.pad_or_trim(audio) # if ram is set to 12 use n_mels=128 else use n_mels=80 - if args.ram == "12gb": + if args.ram == "12gb-v2": + mel = whisper.log_mel_spectrogram(audio, n_mels=80).to(device) + elif args.ram == "12gb-v3": mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(device) else: mel = whisper.log_mel_spectrogram(audio, n_mels=80).to(device) @@ -608,9 +610,9 @@ def mic_calibration(): print("Transcribing...") if device == "cuda": - result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), language=detected_language, condition_on_previous_text=args.condition_on_previous_text) + result = audio_model.transcribe(temp_file, fp16=args.fp16, language=detected_language, condition_on_previous_text=args.condition_on_previous_text) else: - result = audio_model.transcribe(temp_file, condition_on_previous_text=args.condition_on_previous_text) + result = audio_model.transcribe(temp_file, language=detected_language, condition_on_previous_text=args.condition_on_previous_text) if args.no_log == False: print(f"Detected Speech: {result['text']}") @@ -621,9 +623,9 @@ def mic_calibration(): print("Transcription failed, trying again...") send_to_discord_webhook(webhook_url, "Transcription failed, trying again...") if device == "cuda": - result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), language=detected_language, condition_on_previous_text=args.condition_on_previous_text) + result = audio_model.transcribe(temp_file, fp16=args.fp16, language=detected_language, condition_on_previous_text=args.condition_on_previous_text) else: - result = audio_model.transcribe(temp_file, condition_on_previous_text=args.condition_on_previous_text) + result = audio_model.transcribe(temp_file, language=detected_language, condition_on_previous_text=args.condition_on_previous_text) if args.no_log == False: print(f"Detected Speech: {result['text']}") else: @@ -638,9 +640,9 @@ def mic_calibration(): if args.no_log == False: print("Translating...") if device == "cuda": - translated_result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), task="translate", language=detected_language) + translated_result = audio_model.transcribe(temp_file, fp16=args.fp16, task="translate", language=detected_language, condition_on_previous_text=args.condition_on_previous_text) else: - translated_result = audio_model.transcribe(temp_file, task="translate", language=detected_language) + translated_result = audio_model.transcribe(temp_file, task="translate", language=detected_language, condition_on_previous_text=args.condition_on_previous_text) translated_text = translated_result['text'].strip() if translated_text == "": if args.retry: @@ -648,9 +650,9 @@ def mic_calibration(): print("Translation failed, trying again...") send_to_discord_webhook(webhook_url, "Translation failed, trying again...") if device == "cuda": - translated_result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), task="translate", language=detected_language) + translated_result = audio_model.transcribe(temp_file, fp16=args.fp16, task="translate", language=detected_language, condition_on_previous_text=args.condition_on_previous_text) else: - translated_result = audio_model.transcribe(temp_file, task="translate", language=detected_language) + translated_result = audio_model.transcribe(temp_file, task="translate", language=detected_language, condition_on_previous_text=args.condition_on_previous_text) translated_text = translated_result['text'].strip() if args.discord_webhook: if translated_text == "": @@ -669,9 +671,9 @@ def mic_calibration(): if args.no_log == False: print(f"Transcribing to {target_language}...") if device == "cuda": - transcribed_result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), task="transcribe", language=target_language) + transcribed_result = audio_model.transcribe(temp_file, fp16=args.fp16, task="transcribe", language=target_language, condition_on_previous_text=args.condition_on_previous_text) else: - transcribed_result = audio_model.transcribe(temp_file, task="transcribe", language=target_language) + transcribed_result = audio_model.transcribe(temp_file, task="transcribe", language=target_language, condition_on_previous_text=args.condition_on_previous_text) transcribed_text = transcribed_result['text'].strip() if transcribed_text == "": if args.retry: @@ -679,9 +681,9 @@ def mic_calibration(): print("transcribe failed, trying again...") send_to_discord_webhook(webhook_url, "transcribe failed, trying again...") if device == "cuda": - transcribed_result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), task="transcribe", language=target_language) + transcribed_result = audio_model.transcribe(temp_file, fp16=args.fp16, task="transcribe", language=target_language, condition_on_previous_text=args.condition_on_previous_text) else: - transcribed_result = audio_model.transcribe(temp_file, task="transcribe", language=target_language) + transcribed_result = audio_model.transcribe(temp_file, task="transcribe", language=target_language, condition_on_previous_text=args.condition_on_previous_text) transcribed_text = transcribed_result['text'].strip() if args.discord_webhook: if transcribed_text == "": From e22f0f38989fba3e2318191bf946553317277cf1 Mon Sep 17 00:00:00 2001 From: cyber <19499442+cyberofficial@users.noreply.github.com> Date: Thu, 26 Sep 2024 19:00:36 -0400 Subject: [PATCH 3/3] =?UTF-8?q?Version=20Bump=20=F0=9F=91=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Version Bump 👀 --- modules/version_checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/version_checker.py b/modules/version_checker.py index fd9c741..3770a73 100644 --- a/modules/version_checker.py +++ b/modules/version_checker.py @@ -1,6 +1,6 @@ from modules.imports import * -version = "1.0.99998" +version = "1.0.99999" ScriptCreator = "cyberofficial" GitHubRepo = "https://github.com/cyberofficial/Synthalingua" repo_owner = "cyberofficial"