From 38cc5863914da030df55f6f2d03a94ac05b95117 Mon Sep 17 00:00:00 2001
From: cyber <19499442+cyberofficial@users.noreply.github.com>
Date: Thu, 26 Sep 2024 15:54:30 -0400
Subject: [PATCH 1/3] Fix: Display headers correctly based on URL parameters

---
 html_data/index.html | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/html_data/index.html b/html_data/index.html
index a721ba5..3448d91 100644
--- a/html_data/index.html
+++ b/html_data/index.html
@@ -30,16 +30,20 @@ <h2 id="transcribed-header" class="hidden"></h2>
             const showTranslation = params.has("showtranslation");
             const showTranscription = params.has("showtranscription");
 
-            if (showOriginal) {
+            if (!showOriginal && !showTranslation && !showTranscription) {
                 showElementById("header-text");
-            } 
-            
-            if (showTranslation) {
                 showElementById("translated-header");
-            }
-
-            if (showTranscription) {
                 showElementById("transcribed-header");
+            } else {
+                if (showOriginal) {
+                    showElementById("header-text");
+                }
+                if (showTranslation) {
+                    showElementById("translated-header");
+                }
+                if (showTranscription) {
+                    showElementById("transcribed-header");
+                }
             }
         });
     </script>

From 76ecb698a6cb98f22285989d75eaf8f59b19d362 Mon Sep 17 00:00:00 2001
From: cyber <19499442+cyberofficial@users.noreply.github.com>
Date: Thu, 26 Sep 2024 16:50:58 -0400
Subject: [PATCH 2/3] [feature update] New Arguments and Improvements

* Customize the Captions of the Web Player.
Preview: https://streamable.com/7cd1kk
------
- Added new argument "--fp16". This allows for more accurate information being passed to the process. This will grant the AL the ability to process more information at the cost of speed. You will not see heavy impact on stronger hardware.
- You can now choose between 2 12GB Models, V2 and V3, using the ram argument flag like normal, but instead of 12gb, it's "12gb-v2", "12gb-v3" now.  12GB v2 - More Stable, Much Faster, Good for High End GTX devices, Overkill for RTX Devices.
12GB v3 - More Accurate, Tiny bit Slower, Good for High End RTX devices
Combine 12gb-v3 + fp16 Flags (Precision Mode on the GUI) for the ultimate experience.
- The Stream Transcription module had some fixes applied onto it.
- Subtitle Creator will work with FP16 mode and missing subflag was added.
- GUI Has new elements to handle the new arguments. Also some minor spelling mistakes were zapped.
- Microphone Mode had some improvements made and fixes applied to it.
---
 README.md                                     |   1 +
 Synthalingua_Wrapper/App.config               |   5 +-
 Synthalingua_Wrapper/MainUI.Designer.vb       |  28 ++-
 Synthalingua_Wrapper/MainUI.resx              |  15 +-
 Synthalingua_Wrapper/MainUI.vb                |  12 +-
 .../PublishProfiles/FolderProfile.pubxml.user |   2 +-
 .../My Project/Settings.Designer.vb           |  20 +-
 .../My Project/Settings.settings              |   7 +-
 Synthalingua_Wrapper/Synthalingua_Wrapper.sln |   6 -
 .../Synthalingua_Wrapper.vbproj               |   8 +-
 html_data/player.html                         | 172 +++++++++++++++---
 modules/parser_args.py                        |  27 ++-
 modules/stream_transcription_module.py        |  12 +-
 modules/sub_gen.py                            |   2 +-
 transcribe_audio.py                           |  30 +--
 15 files changed, 275 insertions(+), 72 deletions(-)

diff --git a/README.md b/README.md
index 8b04c05..df6ffb3 100644
--- a/README.md
+++ b/README.md
@@ -153,6 +153,7 @@ This script uses argparse to accept command line arguments. The following option
 | ---- | ----------- |
 | `--ram` | Change the amount of RAM to use. Default is 4GB. Choices are "1GB", "2GB", "4GB", "6GB", "12GB". |
 | `--ramforce` | Use this flag to force the script to use desired VRAM. May cause the script to crash if there is not enough VRAM available. |
+| `--fp16` | This allows for more accurate information being passed to the process. This will grant the AL the ability to process more information at the cost of speed. You will not see heavy impact on stronger hardware. Combine 12gb-v3 + fp16 Flags (Precision Mode on the GUI) for the ultimate experience. | 
 | `--energy_threshold` | Set the energy level for microphone to detect. Default is 100. Choose from 1 to 1000; anything higher will be harder to trigger the audio detection. |
 | `--mic_calibration_time` | How long to calibrate the mic for in seconds. To skip user input type 0 and time will be set to 5 seconds. |
 | `--record_timeout` | Set the time in seconds for real-time recording. Default is 2 seconds. |
diff --git a/Synthalingua_Wrapper/App.config b/Synthalingua_Wrapper/App.config
index ad9a14f..16c2a34 100644
--- a/Synthalingua_Wrapper/App.config
+++ b/Synthalingua_Wrapper/App.config
@@ -25,7 +25,7 @@
                 <value>True</value>
             </setting>
             <setting name="RamSize" serializeAs="String">
-                <value>1</value>
+                <value>1gb</value>
             </setting>
             <setting name="ForceRam" serializeAs="String">
                 <value>False</value>
@@ -147,6 +147,9 @@
             <setting name="modelDIr" serializeAs="String">
                 <value />
             </setting>
+            <setting name="fp16" serializeAs="String">
+                <value>False</value>
+            </setting>
         </Synthalingua_Wrapper.My.MySettings>
     </userSettings>
 </configuration>
diff --git a/Synthalingua_Wrapper/MainUI.Designer.vb b/Synthalingua_Wrapper/MainUI.Designer.vb
index d0e10e5..39e1ad6 100644
--- a/Synthalingua_Wrapper/MainUI.Designer.vb
+++ b/Synthalingua_Wrapper/MainUI.Designer.vb
@@ -93,6 +93,8 @@ Partial Class MainUI
         Label9 = New Label()
         CaptionsInputBtn = New Button()
         TabPage4 = New TabPage()
+        Label17 = New Label()
+        PrecisionCheckBox = New CheckBox()
         Label16 = New Label()
         modelDIr = New TextBox()
         modelDirPicker = New Button()
@@ -261,7 +263,7 @@ Partial Class MainUI
         RamSize.AutoCompleteSource = AutoCompleteSource.ListItems
         RamSize.DropDownStyle = ComboBoxStyle.DropDownList
         RamSize.FormattingEnabled = True
-        RamSize.Items.AddRange(New Object() {"1gb", "2gb", "4gb", "6gb", "12gb"})
+        RamSize.Items.AddRange(New Object() {"1gb", "2gb", "4gb", "6gb", "12gb-v2", "12gb-v3"})
         RamSize.Location = New Point(71, 5)
         RamSize.Margin = New Padding(3, 2, 3, 2)
         RamSize.Name = "RamSize"
@@ -904,6 +906,8 @@ Partial Class MainUI
         ' TabPage4
         ' 
         TabPage4.BackColor = Color.DarkSlateBlue
+        TabPage4.Controls.Add(Label17)
+        TabPage4.Controls.Add(PrecisionCheckBox)
         TabPage4.Controls.Add(Label16)
         TabPage4.Controls.Add(Label2)
         TabPage4.Controls.Add(RamSize)
@@ -917,6 +921,26 @@ Partial Class MainUI
         TabPage4.TabIndex = 3
         TabPage4.Text = "Model Settings"
         ' 
+        ' Label17
+        ' 
+        Label17.AutoSize = True
+        Label17.Font = New Font("Segoe UI", 12F)
+        Label17.Location = New Point(6, 88)
+        Label17.Name = "Label17"
+        Label17.Size = New Size(443, 147)
+        Label17.TabIndex = 9
+        Label17.Text = resources.GetString("Label17.Text")
+        ' 
+        ' PrecisionCheckBox
+        ' 
+        PrecisionCheckBox.AutoSize = True
+        PrecisionCheckBox.Location = New Point(241, 7)
+        PrecisionCheckBox.Name = "PrecisionCheckBox"
+        PrecisionCheckBox.Size = New Size(108, 19)
+        PrecisionCheckBox.TabIndex = 8
+        PrecisionCheckBox.Text = "Precision Mode"
+        PrecisionCheckBox.UseVisualStyleBackColor = True
+        ' 
         ' Label16
         ' 
         Label16.AutoSize = True
@@ -1431,5 +1455,7 @@ Partial Class MainUI
     Friend WithEvents Label16 As Label
     Friend WithEvents modelDirPicker As Button
     Friend WithEvents FolderBrowserDialog2 As FolderBrowserDialog
+    Friend WithEvents PrecisionCheckBox As CheckBox
+    Friend WithEvents Label17 As Label
 
 End Class
diff --git a/Synthalingua_Wrapper/MainUI.resx b/Synthalingua_Wrapper/MainUI.resx
index 9169d4a..2c29c24 100644
--- a/Synthalingua_Wrapper/MainUI.resx
+++ b/Synthalingua_Wrapper/MainUI.resx
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <root>
   <!--
-    Microsoft ResX Schema 
+    Microsoft ResX Schema
 
     Version 2.0
 
@@ -48,7 +48,7 @@
     value   : The object must be serialized with
             : System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
             : and then encoded with base64 encoding.
-    
+
     mimetype: application/x-microsoft.net.object.soap.base64
     value   : The object must be serialized with
             : System.Runtime.Serialization.Formatters.Soap.SoapFormatter
@@ -121,7 +121,7 @@
     <value>What is "id type"? Usually the name to the left of the "=" 
 like "file.m3u8&amp;&amp;key=000000" | In this case, id is "key" and password
 is the set of 0. Click the check box first then Paste the full stream
-url with key. then idenify the id and key in the text boxes above.
+url with key. then identify the id and key in the text boxes above.
 This will hide the info for secuirty.</value>
   </data>
   <metadata name="OpenScriptDiag.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
@@ -130,6 +130,15 @@ This will hide the info for secuirty.</value>
   <metadata name="SaveFileDialog.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
     <value>183, 17</value>
   </metadata>
+  <data name="Label17.Text" xml:space="preserve">
+    <value>Info:
+12GB v2 - More Stable, Much Faster,, 
+12GB v3 - More Accurate, Tiny bit Slower, Tiny bit more Heavy.
+
+Precision Mode - This will help reduce errors, but will increase
+the time it takes. This option is for all models. This allows more
+infomation to be put into numbers for the process to handle.</value>
+  </data>
   <metadata name="ToolTip1.TrayLocation" type="System.Drawing.Point, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
     <value>342, 17</value>
   </metadata>
diff --git a/Synthalingua_Wrapper/MainUI.vb b/Synthalingua_Wrapper/MainUI.vb
index 631b5f7..28e786d 100644
--- a/Synthalingua_Wrapper/MainUI.vb
+++ b/Synthalingua_Wrapper/MainUI.vb
@@ -171,6 +171,10 @@ Public Class MainUI
             ConfigTextBox.Text += "--model_dir """ & modelDIr.Text & """" & " "
         End If
 
+        If PrecisionCheckBox.Checked Then
+            ConfigTextBox.Text += "--fp16"
+        End If
+
         ConfigTextBox.Text += vbNewLine & "pause"
 
     End Sub
@@ -290,7 +294,7 @@ Public Class MainUI
 
             PortNumber.Value = .WebServerPort
             WebServerButton.Checked = .WebServerEnabled
-            RamSize.Text = .RamSize & "gb"
+            RamSize.Text = .RamSize
             ForceRam.Checked = .ForceRam
             CookiesName.Text = .CookieName
             StreamLanguage.Text = .StreamLanguage
@@ -317,6 +321,7 @@ Public Class MainUI
             hlspassword.Text = .hlspassword
             cb_halspassword.Checked = .cb_halspassword
             modelDIr.Text = .modelDIr
+            PrecisionCheckBox.Checked = .fp16
             Try
                 PrimaryFolder = .PrimaryFolder
             Catch ex As Exception
@@ -505,7 +510,7 @@ Public Class MainUI
             .WebServerPort = PortNumber.Value
 
             ' RAM Size
-            .RamSize = RamSize.Text.Replace("gb", "")
+            .RamSize = RamSize.Text
 
             ' Model Location
             .modelDIr = modelDIr.Text
@@ -568,6 +573,9 @@ Public Class MainUI
             .hlspassword = hlspassword.Text
             .cb_halspassword = cb_halspassword.Checked
 
+            'Precision Mode
+            .fp16 = PrecisionCheckBox.Checked
+
         End With
         My.Settings.Save()
     End Sub
diff --git a/Synthalingua_Wrapper/My Project/PublishProfiles/FolderProfile.pubxml.user b/Synthalingua_Wrapper/My Project/PublishProfiles/FolderProfile.pubxml.user
index fed3b9f..2aeda2c 100644
--- a/Synthalingua_Wrapper/My Project/PublishProfiles/FolderProfile.pubxml.user	
+++ b/Synthalingua_Wrapper/My Project/PublishProfiles/FolderProfile.pubxml.user	
@@ -4,7 +4,7 @@ https://go.microsoft.com/fwlink/?LinkID=208121.
 -->
 <Project>
   <PropertyGroup>
-    <History>True|2024-09-26T19:09:54.5357885Z||;True|2024-09-18T03:09:20.6489818-04:00||;True|2024-08-08T02:50:46.6107116-04:00||;True|2024-08-08T02:34:00.3980329-04:00||;True|2024-08-08T02:31:56.2576355-04:00||;True|2024-08-08T02:27:59.1611557-04:00||;True|2024-08-08T02:26:48.4386992-04:00||;True|2024-08-08T02:24:08.4320611-04:00||;True|2024-08-08T02:19:01.0250722-04:00||;True|2024-08-08T01:51:56.8183777-04:00||;True|2024-08-08T01:50:23.0868936-04:00||;True|2024-08-08T01:50:10.4543482-04:00||;True|2024-08-08T01:49:59.7945394-04:00||;True|2024-08-08T01:47:41.5885686-04:00||;True|2024-08-08T01:46:26.9076296-04:00||;True|2024-08-08T01:46:06.3593091-04:00||;True|2024-08-08T01:44:52.6974951-04:00||;True|2024-08-08T01:43:33.9183523-04:00||;True|2024-08-08T01:43:11.3344818-04:00||;True|2024-08-08T01:40:22.8767718-04:00||;True|2024-08-08T01:38:27.2167559-04:00||;</History>
+    <History>True|2024-09-26T22:52:16.8047523Z||;True|2024-09-26T18:45:33.6369257-04:00||;True|2024-09-26T17:43:15.5416964-04:00||;True|2024-09-26T17:13:09.1691137-04:00||;True|2024-09-26T17:08:51.5207150-04:00||;True|2024-09-26T15:09:54.5357885-04:00||;True|2024-09-18T03:09:20.6489818-04:00||;True|2024-08-08T02:50:46.6107116-04:00||;True|2024-08-08T02:34:00.3980329-04:00||;True|2024-08-08T02:31:56.2576355-04:00||;True|2024-08-08T02:27:59.1611557-04:00||;True|2024-08-08T02:26:48.4386992-04:00||;True|2024-08-08T02:24:08.4320611-04:00||;True|2024-08-08T02:19:01.0250722-04:00||;True|2024-08-08T01:51:56.8183777-04:00||;True|2024-08-08T01:50:23.0868936-04:00||;True|2024-08-08T01:50:10.4543482-04:00||;True|2024-08-08T01:49:59.7945394-04:00||;True|2024-08-08T01:47:41.5885686-04:00||;True|2024-08-08T01:46:26.9076296-04:00||;True|2024-08-08T01:46:06.3593091-04:00||;True|2024-08-08T01:44:52.6974951-04:00||;True|2024-08-08T01:43:33.9183523-04:00||;True|2024-08-08T01:43:11.3344818-04:00||;True|2024-08-08T01:40:22.8767718-04:00||;True|2024-08-08T01:38:27.2167559-04:00||;</History>
     <LastFailureDetails />
   </PropertyGroup>
 </Project>
\ No newline at end of file
diff --git a/Synthalingua_Wrapper/My Project/Settings.Designer.vb b/Synthalingua_Wrapper/My Project/Settings.Designer.vb
index e3191f5..cd6a8f4 100644
--- a/Synthalingua_Wrapper/My Project/Settings.Designer.vb	
+++ b/Synthalingua_Wrapper/My Project/Settings.Designer.vb	
@@ -15,7 +15,7 @@ Option Explicit On
 Namespace My
     
     <Global.System.Runtime.CompilerServices.CompilerGeneratedAttribute(),  _
-     Global.System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "17.10.0.0"),  _
+     Global.System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "17.11.0.0"),  _
      Global.System.ComponentModel.EditorBrowsableAttribute(Global.System.ComponentModel.EditorBrowsableState.Advanced)>  _
     Partial Friend NotInheritable Class MySettings
         Inherits Global.System.Configuration.ApplicationSettingsBase
@@ -116,10 +116,10 @@ Namespace My
         
         <Global.System.Configuration.UserScopedSettingAttribute(),  _
          Global.System.Diagnostics.DebuggerNonUserCodeAttribute(),  _
-         Global.System.Configuration.DefaultSettingValueAttribute("1")>  _
-        Public Property RamSize() As Integer
+         Global.System.Configuration.DefaultSettingValueAttribute("1gb")>  _
+        Public Property RamSize() As String
             Get
-                Return CType(Me("RamSize"),Integer)
+                Return CType(Me("RamSize"),String)
             End Get
             Set
                 Me("RamSize") = value
@@ -605,6 +605,18 @@ Namespace My
                 Me("modelDIr") = value
             End Set
         End Property
+        
+        <Global.System.Configuration.UserScopedSettingAttribute(),  _
+         Global.System.Diagnostics.DebuggerNonUserCodeAttribute(),  _
+         Global.System.Configuration.DefaultSettingValueAttribute("False")>  _
+        Public Property fp16() As Boolean
+            Get
+                Return CType(Me("fp16"),Boolean)
+            End Get
+            Set
+                Me("fp16") = value
+            End Set
+        End Property
     End Class
 End Namespace
 
diff --git a/Synthalingua_Wrapper/My Project/Settings.settings b/Synthalingua_Wrapper/My Project/Settings.settings
index 7dbf74f..e360e5a 100644
--- a/Synthalingua_Wrapper/My Project/Settings.settings	
+++ b/Synthalingua_Wrapper/My Project/Settings.settings	
@@ -17,8 +17,8 @@
     <Setting Name="WebServerEnabled" Type="System.Boolean" Scope="User">
       <Value Profile="(Default)">True</Value>
     </Setting>
-    <Setting Name="RamSize" Type="System.Int32" Scope="User">
-      <Value Profile="(Default)">1</Value>
+    <Setting Name="RamSize" Type="System.String" Scope="User">
+      <Value Profile="(Default)">1gb</Value>
     </Setting>
     <Setting Name="ForceRam" Type="System.Boolean" Scope="User">
       <Value Profile="(Default)">False</Value>
@@ -140,5 +140,8 @@
     <Setting Name="modelDIr" Type="System.String" Scope="User">
       <Value Profile="(Default)" />
     </Setting>
+    <Setting Name="fp16" Type="System.Boolean" Scope="User">
+      <Value Profile="(Default)">False</Value>
+    </Setting>
   </Settings>
 </SettingsFile>
\ No newline at end of file
diff --git a/Synthalingua_Wrapper/Synthalingua_Wrapper.sln b/Synthalingua_Wrapper/Synthalingua_Wrapper.sln
index caf0e8c..8e7720b 100644
--- a/Synthalingua_Wrapper/Synthalingua_Wrapper.sln
+++ b/Synthalingua_Wrapper/Synthalingua_Wrapper.sln
@@ -7,18 +7,12 @@ Project("{778DAE3C-4631-46EA-AA77-85C1314464D9}") = "Synthalingua_Wrapper", "Syn
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
 		Debug|x64 = Debug|x64
-		Release|Any CPU = Release|Any CPU
 		Release|x64 = Release|x64
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Debug|x64.ActiveCfg = Debug|x64
 		{CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Debug|x64.Build.0 = Debug|x64
-		{CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Release|Any CPU.Build.0 = Release|Any CPU
 		{CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Release|x64.ActiveCfg = Release|x64
 		{CAF88D7C-75DD-495B-9971-BFEB457CFAC5}.Release|x64.Build.0 = Release|x64
 	EndGlobalSection
diff --git a/Synthalingua_Wrapper/Synthalingua_Wrapper.vbproj b/Synthalingua_Wrapper/Synthalingua_Wrapper.vbproj
index c4e857d..875ea8c 100644
--- a/Synthalingua_Wrapper/Synthalingua_Wrapper.vbproj
+++ b/Synthalingua_Wrapper/Synthalingua_Wrapper.vbproj
@@ -12,20 +12,20 @@
     <PackageIcon>syntha.png</PackageIcon>
     <ApplicationIcon>assets\syntha.ico</ApplicationIcon>
     <AssemblyName>SynthalinguaGUI</AssemblyName>
-    <Copyright>Build Date: Sep 26 2024 3:07 PM EDT</Copyright>
+    <Copyright>Build Date: Sep 26 2024 6:50 PM EDT</Copyright>
     <FileVersion>4</FileVersion>
-    <AssemblyVersion>1.1.4.44</AssemblyVersion>
+    <AssemblyVersion>1.1.4.45</AssemblyVersion>
     <Version>1</Version>
     <RepositoryUrl>https://github.com/cyberofficial/Synthalingua</RepositoryUrl>
     <RepositoryType>git</RepositoryType>
   </PropertyGroup>
 
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
-    <PlatformTarget>AnyCPU</PlatformTarget>
+    <PlatformTarget>x64</PlatformTarget>
   </PropertyGroup>
 
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
-    <PlatformTarget>AnyCPU</PlatformTarget>
+    <PlatformTarget>x64</PlatformTarget>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/html_data/player.html b/html_data/player.html
index e9ae1c2..1d79bd6 100644
--- a/html_data/player.html
+++ b/html_data/player.html
@@ -8,7 +8,6 @@
     <meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate" />
     <meta http-equiv="Pragma" content="no-cache" />
     <meta http-equiv="Expires" content="0" />
-    <!--- <link rel="stylesheet" type="text/css" href="/static/styles.css?q={{q}}"> --->
     <style>
         body {
             font-family: Arial, sans-serif;
@@ -24,22 +23,13 @@
         }
 
         /* Dark mode */
-
         body.dark-mode {
             background-color: #222222;
             color: #ffffff;
         }
 
-        /* Dark mode header text color */
-
-        body.dark-mode .header-item {
-            color: #f0f0f0;
-            /* Lighter shade of white */
-        }
-
         #video-container {
             position: relative;
-            /* Positioning context for headers */
             display: flex;
             justify-content: center;
             align-items: center;
@@ -55,21 +45,13 @@
             border: none;
         }
 
-        /* Dark mode */
-
-        body.dark-mode #headers {
-            background-color: #485F6E;
-        }
-
         .hidden {
-        display: none;
-        box-shadow: none; /* Remove box-shadow in the hidden class */
+            display: none;
         }
 
         .header-item {
             position: absolute;
             bottom: 125px;
-            /* Adjust this value for vertical spacing */
             left: 50%;
             transform: translateX(-50%);
             background-color: rgba(0, 0, 0, 0.70);
@@ -78,7 +60,56 @@
             color: #ffffff;
             font-size: 28px;
             text-align: center;
-            'width: 50%;
+            width: 50%;
+        }
+
+        /* Gear icon */
+        #settings-icon {
+            position: absolute;
+            top: 10px;
+            left: 10px;
+            cursor: pointer;
+            z-index: 10;
+            font-size: 10px;
+            color: rgb(184, 184, 184);
+        }
+
+        /* Modal (popup) window */
+        .modal {
+            display: none;
+            position: fixed;
+            z-index: 100;
+            left: 50%;
+            top: 50%;
+            transform: translate(-50%, -50%);
+            background-color: rgb(39, 30, 30);
+            padding: 20px;
+            border-radius: 10px;
+            box-shadow: 0px 0px 15px rgba(0, 0, 0, 0.2);
+            width: 300px;
+        }
+
+        .modal-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+        }
+
+        .modal-header h3 {
+            margin: 0;
+        }
+
+        .modal-close {
+            cursor: pointer;
+            font-size: 20px;
+        }
+
+        .customization-controls label {
+            margin-right: 10px;
+        }
+
+        .customization-controls input {
+            margin-right: 10px;
         }
     </style>
 </head>
@@ -97,14 +128,107 @@
     </div>
     <div id="video-container">
         <iframe id="video-frame" frameborder="0" allowfullscreen></iframe>
+
+        <!-- Gear Icon -->
+        <div id="settings-icon">Customize Captions</div>
+
         <div id="headers">
-            <p><div class="header-item hidden" id="header-text"></div></p>
-            <p><div class="header-item hidden" id="translated-header"></div></p>
-            <p><div class="header-item hidden" id="transcribed-header"></div></p>
+            <p><div class="header-item hidden" id="header-text">Caption 1</div></p>
+            <p><div class="header-item hidden" id="translated-header">Caption 2</div></p>
+            <p><div class="header-item hidden" id="transcribed-header">Caption 3</div></p>
+        </div>
+    </div>
+
+    <!-- Modal for customization controls -->
+    <div class="modal" id="settings-modal">
+        <div class="modal-header">
+            <h3>Customize Captions</h3>
+            <span class="modal-close" id="close-modal">X</span>
+        </div>
+        <div class="customization-controls">
+            <div>
+                <label for="bg-color">Background Color:</label>
+                <input type="color" id="bg-color" name="bg-color">
+            </div>
+            <div>
+                <label for="text-color">Text Color:</label>
+                <input type="color" id="text-color" name="text-color">
+            </div>
+            <div>
+                <label for="font-size">Font Size:</label>
+                <input type="range" id="font-size" name="font-size" min="10" max="50" value="28">
+            </div>
+            <div>
+                <label for="caption-position">Position (Bottom):</label>
+                <input type="range" id="caption-position" name="caption-position" min="0" max="720" value="125">
+            </div>
+            <div>
+                <label for="opacity">Opacity:</label>
+                <input type="range" id="opacity" name="opacity" min="0" max="100" value="70">
+            </div>
+            <div>
+                <label for="width">Width:</label>
+                <input type="range" id="caption-width" name="width" min="10" max="90" value="50">
+            </div>
         </div>
     </div>
 
     <script src="/static/player_script.js?q={{q}}"></script>
+
+    <script>
+        const settingsIcon = document.getElementById("settings-icon");
+        const settingsModal = document.getElementById("settings-modal");
+        const closeModal = document.getElementById("close-modal");
+
+        const headerText = document.getElementById("header-text");
+        const translatedHeader = document.getElementById("translated-header");
+        const transcribedHeader = document.getElementById("transcribed-header");
+
+        const bgColorInput = document.getElementById("bg-color");
+        const textColorInput = document.getElementById("text-color");
+        const fontSizeInput = document.getElementById("font-size");
+        const captionPositionInput = document.getElementById("caption-position");
+        const opacityInput = document.getElementById("opacity");
+        const captionWidthInput = document.getElementById("caption-width");
+
+        // Open modal
+        settingsIcon.addEventListener("click", () => {
+            settingsModal.style.display = "block";
+        });
+
+        // Close modal
+        closeModal.addEventListener("click", () => {
+            settingsModal.style.display = "none";
+        });
+
+        // Real-time updates
+        const updateCaptions = () => {
+            const bgColor = bgColorInput.value;
+            const textColor = textColorInput.value;
+            const fontSize = fontSizeInput.value + "px";
+            const captionPosition = captionPositionInput.value + "px";
+            const opacity = opacityInput.value / 100; // Opacity slider from 0 to 1
+            const captionWidth = captionWidthInput.value + "%"; // Width percentage
+
+            // Apply styles to all headers
+            [headerText, translatedHeader, transcribedHeader].forEach(header => {
+                header.style.backgroundColor = bgColor;
+                header.style.color = textColor;
+                header.style.fontSize = fontSize;
+                header.style.bottom = captionPosition;
+                header.style.opacity = opacity;
+                header.style.width = captionWidth; // Set width based on input
+            });
+        };
+
+        // Add event listeners for real-time updates
+        bgColorInput.addEventListener("input", updateCaptions);
+        textColorInput.addEventListener("input", updateCaptions);
+        fontSizeInput.addEventListener("input", updateCaptions);
+        captionPositionInput.addEventListener("input", updateCaptions);
+        opacityInput.addEventListener("input", updateCaptions);
+        captionWidthInput.addEventListener("input", updateCaptions);
+    </script>
 </body>
 
-</html>
\ No newline at end of file
+</html>
diff --git a/modules/parser_args.py b/modules/parser_args.py
index 5bb72e9..8a885f4 100644
--- a/modules/parser_args.py
+++ b/modules/parser_args.py
@@ -30,30 +30,45 @@ def set_model_by_ram(ram, language):
     elif ram == "6gb":
         if language == "en" or language == "English":
             model = "medium.en"
-
         else:
             model = "medium"
-    elif ram == "12gb":
-        model = "large-v3"
+    elif ram == "12gb-v2" or ram == "12gb-v3":
+        # Determine the model based on the version
+        if ram == "12gb-v2":
+            model = "large-v2"
+            version = "Version 2"
+        else:  # ram == "12gb-v3"
+            model = "large-v3"
+            version = "Version 3"
+        
+        # Warning for English language
         if language == "en" or language == "English":
             red_text = Fore.RED + Back.BLACK
             green_text = Fore.GREEN + Back.BLACK
             yellow_text = Fore.YELLOW + Back.BLACK
             reset_text = Style.RESET_ALL
-            print(f"{red_text}WARNING{reset_text}: {yellow_text}12gb{reset_text} is overkill for English. Do you want to swap to {green_text}6gb{reset_text} model? If you are transcribing a language other than English, you can ignore this warning and press {green_text}n{reset_text}.")
+            
+            print(f"{red_text}WARNING{reset_text}: {yellow_text}12gb{reset_text} is overkill for English. "
+                  f"Do you want to swap to the {green_text}6gb{reset_text} model? "
+                  f"If you are transcribing a language other than English, you can ignore this warning and press {green_text}n{reset_text}.")
+            
             if input("y/n: ").lower() == "y":
                 model = "medium.en"
             else:
-                model = "large-v3"
+                print(f"Using 12GB {version}")
+                ram = "12gb"  # Normalize ram to "12gb" for both v2 and v3
+
     else:
         raise ValueError("Invalid RAM setting provided")
 
     return model
 
+
 def parse_arguments():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--ram", default="4gb", help="Model to use", choices=["1gb", "2gb", "4gb", "6gb", "12gb"])
+    parser.add_argument("--ram", default="4gb", help="Model to use", choices=["1gb", "2gb", "4gb", "6gb", "12gb-v2", "12gb-v3"])
     parser.add_argument("--ramforce", action='store_true', help="Force the model to use the RAM setting provided. Warning: This may cause the model to crash.")
+    parser.add_argument("--fp16", action='store_true', default=False, help="Sets Models to FP16 Mode, Heavy on Usage, but more accurate")
     parser.add_argument("--energy_threshold", default=100, help="Energy level for mic to detect.", type=int)
     parser.add_argument("--mic_calibration_time", help="How long to calibrate the mic for in seconds. To skip user input type 0 and time will be set to 5 seconds.", type=int)
     parser.add_argument("--record_timeout", default=1, help="How real time the recording is in seconds.", type=float)
diff --git a/modules/stream_transcription_module.py b/modules/stream_transcription_module.py
index ca4e4ef..32bc571 100644
--- a/modules/stream_transcription_module.py
+++ b/modules/stream_transcription_module.py
@@ -139,7 +139,7 @@ def combine_audio_segments(segment_paths, output_path):
 
     def translate_audio(file_path, model):
         try:
-            result = model.transcribe(file_path, task="translate", language=stream_language)
+            result = model.transcribe(file_path, task="translate", fp16=args.fp16, language=stream_language, condition_on_previous_text=args.condition_on_previous_text)
             return result["text"]
         except RuntimeError as e:
             print(f"Error transcribing audio: {e}")
@@ -147,7 +147,7 @@ def translate_audio(file_path, model):
 
     def transcribe_audio(file_path, model, language):
         try:
-            result = model.transcribe(file_path, language=language)
+            result = model.transcribe(file_path, language=language, fp16=args.fp16, condition_on_previous_text=args.condition_on_previous_text)
             return result["text"]
         except RuntimeError as e:
             print(f"Error transcribing audio: {e}")
@@ -157,10 +157,15 @@ def detect_language(file_path, model, device=args.device):
         try:
             audio = whisper.load_audio(file_path)
             audio = whisper.pad_or_trim(audio)
-            if args.ram == "12gb":
+            
+            # Handle both "12gb-v2" and "12gb-v3"
+            if args.ram == "12gb-v2":
+                mel = whisper.log_mel_spectrogram(audio, n_mels=80).to(device)
+            elif args.ram == "12gb-v3":
                 mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(device)
             else:
                 mel = whisper.log_mel_spectrogram(audio, n_mels=80).to(device)
+
             _, language_probs = model.detect_language(mel)
             detected_language = max(language_probs, key=language_probs.get)
             return detected_language
@@ -169,6 +174,7 @@ def detect_language(file_path, model, device=args.device):
             detected_language = "n/a"
             return detected_language
 
+
     def process_audio(file_path, model):
         if not os.path.exists(file_path):
             print(f"Warning: File {file_path} does not exist, skipping.")
diff --git a/modules/sub_gen.py b/modules/sub_gen.py
index 75ae35f..d46ac81 100644
--- a/modules/sub_gen.py
+++ b/modules/sub_gen.py
@@ -14,7 +14,7 @@ def run_sub_gen(input_path: str, output_name: str = "", output_directory: str =
     print("Setting Path")
     print("Doing the work now...")
     print("This may take a while, sit back and get a coffee or something.")
-    result = model.transcribe(input_path, language=args.language, task="translate")
+    result = model.transcribe(input_path, fp16=args.fp16, language=args.language, task="translate", condition_on_previous_text=args.condition_on_previous_text)
 
     print("Setting writer Up")
     writer = get_writer("srt", str(output_directory))
diff --git a/transcribe_audio.py b/transcribe_audio.py
index 4493d1a..a7bd277 100644
--- a/transcribe_audio.py
+++ b/transcribe_audio.py
@@ -277,7 +277,7 @@ def mic_calibration():
         cuda_vram = torch.cuda.get_device_properties(torch.cuda.current_device()).total_memory / 1024 / 1024
         overhead_buffer = 200
 
-        ram_options = [("12gb", 12000), ("6gb", 6144), ("4gb", 4096), ("2gb", 2048), ("1gb", 1024)]
+        ram_options = [("12gb-v2", 12000), ("6gb", 6144), ("4gb", 4096), ("2gb", 2048), ("1gb", 1024)]
 
         found = False
         old_ram_flag = args.ram
@@ -553,7 +553,9 @@ def mic_calibration():
                 audio = whisper.load_audio(temp_file)
                 audio = whisper.pad_or_trim(audio)
                 # if ram is set to 12 use n_mels=128 else use n_mels=80
-                if args.ram == "12gb":
+                if args.ram == "12gb-v2":
+                    mel = whisper.log_mel_spectrogram(audio, n_mels=80).to(device)
+                elif args.ram == "12gb-v3":
                     mel = whisper.log_mel_spectrogram(audio, n_mels=128).to(device)
                 else:
                     mel = whisper.log_mel_spectrogram(audio, n_mels=80).to(device)
@@ -608,9 +610,9 @@ def mic_calibration():
                         print("Transcribing...")
 
                 if device == "cuda":
-                    result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), language=detected_language, condition_on_previous_text=args.condition_on_previous_text)
+                    result = audio_model.transcribe(temp_file, fp16=args.fp16, language=detected_language, condition_on_previous_text=args.condition_on_previous_text)
                 else:
-                    result = audio_model.transcribe(temp_file, condition_on_previous_text=args.condition_on_previous_text)
+                    result = audio_model.transcribe(temp_file, language=detected_language, condition_on_previous_text=args.condition_on_previous_text)
 
                 if args.no_log == False:
                     print(f"Detected Speech: {result['text']}")
@@ -621,9 +623,9 @@ def mic_calibration():
                             print("Transcription failed, trying again...")
                         send_to_discord_webhook(webhook_url, "Transcription failed, trying again...")
                         if device == "cuda":
-                            result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), language=detected_language, condition_on_previous_text=args.condition_on_previous_text)
+                            result = audio_model.transcribe(temp_file, fp16=args.fp16, language=detected_language, condition_on_previous_text=args.condition_on_previous_text)
                         else:
-                            result = audio_model.transcribe(temp_file, condition_on_previous_text=args.condition_on_previous_text)
+                            result = audio_model.transcribe(temp_file, language=detected_language, condition_on_previous_text=args.condition_on_previous_text)
                         if args.no_log == False:
                             print(f"Detected Speech: {result['text']}")
                     else:
@@ -638,9 +640,9 @@ def mic_calibration():
                         if args.no_log == False:
                             print("Translating...")
                         if device == "cuda":
-                            translated_result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), task="translate", language=detected_language)
+                            translated_result = audio_model.transcribe(temp_file, fp16=args.fp16, task="translate", language=detected_language, condition_on_previous_text=args.condition_on_previous_text)
                         else:
-                            translated_result = audio_model.transcribe(temp_file, task="translate", language=detected_language)
+                            translated_result = audio_model.transcribe(temp_file, task="translate", language=detected_language, condition_on_previous_text=args.condition_on_previous_text)
                         translated_text = translated_result['text'].strip()
                         if translated_text == "":
                             if args.retry:
@@ -648,9 +650,9 @@ def mic_calibration():
                                     print("Translation failed, trying again...")
                                 send_to_discord_webhook(webhook_url, "Translation failed, trying again...")
                                 if device == "cuda":
-                                    translated_result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), task="translate", language=detected_language)
+                                    translated_result = audio_model.transcribe(temp_file, fp16=args.fp16, task="translate", language=detected_language, condition_on_previous_text=args.condition_on_previous_text)
                                 else:
-                                    translated_result = audio_model.transcribe(temp_file, task="translate", language=detected_language)
+                                    translated_result = audio_model.transcribe(temp_file, task="translate", language=detected_language, condition_on_previous_text=args.condition_on_previous_text)
                             translated_text = translated_result['text'].strip()
                         if args.discord_webhook:
                             if translated_text == "":
@@ -669,9 +671,9 @@ def mic_calibration():
                     if args.no_log == False:
                         print(f"Transcribing to {target_language}...")
                     if device == "cuda":
-                        transcribed_result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), task="transcribe", language=target_language)
+                        transcribed_result = audio_model.transcribe(temp_file, fp16=args.fp16, task="transcribe", language=target_language, condition_on_previous_text=args.condition_on_previous_text)
                     else:
-                        transcribed_result = audio_model.transcribe(temp_file, task="transcribe", language=target_language)
+                        transcribed_result = audio_model.transcribe(temp_file, task="transcribe", language=target_language, condition_on_previous_text=args.condition_on_previous_text)
                     transcribed_text = transcribed_result['text'].strip()
                     if transcribed_text == "":
                         if args.retry:
@@ -679,9 +681,9 @@ def mic_calibration():
                                 print("transcribe failed, trying again...")
                             send_to_discord_webhook(webhook_url, "transcribe failed, trying again...")
                             if device == "cuda":
-                                transcribed_result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available(), task="transcribe", language=target_language)
+                                transcribed_result = audio_model.transcribe(temp_file, fp16=args.fp16, task="transcribe", language=target_language, condition_on_previous_text=args.condition_on_previous_text)
                             else:
-                                transcribed_result = audio_model.transcribe(temp_file, task="transcribe", language=target_language)
+                                transcribed_result = audio_model.transcribe(temp_file, task="transcribe", language=target_language, condition_on_previous_text=args.condition_on_previous_text)
                         transcribed_text = transcribed_result['text'].strip()
                     if args.discord_webhook:
                         if transcribed_text == "":

From e22f0f38989fba3e2318191bf946553317277cf1 Mon Sep 17 00:00:00 2001
From: cyber <19499442+cyberofficial@users.noreply.github.com>
Date: Thu, 26 Sep 2024 19:00:36 -0400
Subject: [PATCH 3/3] =?UTF-8?q?Version=20Bump=20=F0=9F=91=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Version Bump 👀
---
 modules/version_checker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/version_checker.py b/modules/version_checker.py
index fd9c741..3770a73 100644
--- a/modules/version_checker.py
+++ b/modules/version_checker.py
@@ -1,6 +1,6 @@
 from modules.imports import *
 
-version = "1.0.99998"
+version = "1.0.99999"
 ScriptCreator = "cyberofficial"
 GitHubRepo = "https://github.com/cyberofficial/Synthalingua"
 repo_owner = "cyberofficial"