src

jordimas · Jul 14, 2024 · 037df69 · 037df69
1 parent bb5e541
commit 037df69
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 16 deletions.
diff --git a/src/tmx_to_text/converttmx.py b/src/tmx_to_text/converttmx.py
@@ -28,9 +28,9 @@ def __init__(self, input_file, en_filename, ca_filename):
         self.en_filename = en_filename
         self.ca_filename = ca_filename
 
-    def convert(self, source_language, target_language, nodup = False):
+    def convert(self, source_language, target_language, nodup_source = False, nodup_target = False):
         entries = 0
-        duplicated = 0
+        duplicated_source = 0
         seen_sources = set()
         tf_en = open(self.en_filename, 'w')
         tf_ca = open(self.ca_filename, 'w')
@@ -77,10 +77,10 @@ def convert(self, source_language, target_language, nodup = False):
                 translation = translation.replace("\n", '')
 
                 write_entry = True
-                if nodup:
+                if nodup_source:
                     if source in seen_sources:
                         write_entry = False
-                        duplicated += 1
+                        duplicated_source += 1
                     else:
                         seen_sources.add(source)
 
@@ -92,13 +92,12 @@ def convert(self, source_language, target_language, nodup = False):
                 tu.clear()
                 elem.clear()
 
-
         tf_en.close()
         tf_ca.close()
         fp.close()
         print(f"Wrote {entries} strings")
-        if nodup:
-            print(f"Duplicates {duplicated} strings)")
+        if nodup_source:
+            print(f"Duplicates {duplicated_source} strings in source")
 
         if entries == 0:
             print(f"Make sure using 'info' command that there are actually strings for both languages '{source_language}' and '{target_language}'")
diff --git a/src/tmx_to_text/tmx_to_text.py b/src/tmx_to_text/tmx_to_text.py
@@ -37,7 +37,8 @@ def read_parameters():
     convert.add_argument('-t', '--target_lang', type=str, required=True, dest='target_lang', help="Target language to export")
     convert.add_argument('-p', '--prefix', type=str, dest='prefix', default='', help="Filename prefix used in the generated text files")
     convert.add_argument('-d', '--debug', action='store_true', default=False, dest='debug', help="Debug memory and execution time")
-    convert.add_argument('-x', '--nodups', action='store_true', default=False, dest='nodup', help="Remove duplicates based on source")
+    convert.add_argument('-x', '--nodup_source', action='store_true', default=False, dest='nodup_source', help="Remove duplicates based on source")
+    convert.add_argument('-a', '--nodup_target', action='store_true', default=False, dest='nodup_target', help="Remove duplicates based on target")
     args = parser.parse_args()
     return args
 
@@ -58,7 +59,7 @@ def convert(args):
         start_time = datetime.datetime.now()
 
     convert = ConvertTmx(tmx_file, txt_en_file, txt_ca_file)
-    convert.convert(source, target, args.nodup)
+    convert.convert(source, target, args.nodup_source, args.nodup_target)
 
     if debug:
         max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 

diff --git a/tests/data/duplicate.xml → tests/data/duplicate_source.xml b/tests/data/duplicate.xml → tests/data/duplicate_source.xml
@@ -9,7 +9,7 @@
         <seg>"Aligner" aligner utility</seg>
       </tuv>
       <tuv lang="CA">
-        <seg>Alineador de textos "Aligner"</seg>
+        <seg>Alineador de textos "Aligner" 1</seg>
       </tuv>
     </tu>
     <tu>
@@ -19,7 +19,7 @@
         <seg>"Aligner" aligner utility</seg>
       </tuv>
       <tuv lang="CA">
-        <seg>Alineador de textos "Aligner"</seg>
+        <seg>Alineador de textos "Aligner" 2</seg>
       </tuv>
     </tu>
     <tu>

diff --git a/tests/testconverttmx.py b/tests/testconverttmx.py
@@ -99,31 +99,31 @@ def test_conversion_nodup_no(self):
         source_file = tempfile.NamedTemporaryFile().name
         target_file = tempfile.NamedTemporaryFile().name
 
-        tmx_file = self._get_tmx_file('duplicate.xml')
+        tmx_file = self._get_tmx_file('duplicate_source.xml')
         convertTmx = ConvertTmx(tmx_file, source_file, target_file)
         convertTmx.convert("en", "ca")
 
         source_lines, target_lines = self._get_lines(source_file, target_file)
         self.assertEquals(2, len(source_lines))
         self.assertEquals(2, len(target_lines))
         self.assertEquals(source_lines[0].rstrip(), '"Aligner" aligner utility')
-        self.assertEquals(target_lines[0].rstrip(), 'Alineador de textos "Aligner"')
+        self.assertEquals(target_lines[0].rstrip(), 'Alineador de textos "Aligner" 1')
         self.assertEquals(source_lines[1].rstrip(), '"Aligner" aligner utility')
-        self.assertEquals(target_lines[1].rstrip(), 'Alineador de textos "Aligner"')
+        self.assertEquals(target_lines[1].rstrip(), 'Alineador de textos "Aligner" 2')
 
     def test_conversion_nodup_yes(self):
         source_file = tempfile.NamedTemporaryFile().name
         target_file = tempfile.NamedTemporaryFile().name
 
-        tmx_file = self._get_tmx_file('duplicate.xml')
+        tmx_file = self._get_tmx_file('duplicate_source.xml')
         convertTmx = ConvertTmx(tmx_file, source_file, target_file)
         convertTmx.convert("en", "ca", True)
 
         source_lines, target_lines = self._get_lines(source_file, target_file)
         self.assertEquals(1, len(source_lines))
         self.assertEquals(1, len(target_lines))
         self.assertEquals(source_lines[0].rstrip(), '"Aligner" aligner utility')
-        self.assertEquals(target_lines[0].rstrip(), 'Alineador de textos "Aligner"')
+        self.assertEquals(target_lines[0].rstrip(), 'Alineador de textos "Aligner" 1')
 
 if __name__ == '__main__':
     unittest.main()