Skip to content

Commit

Permalink
src
Browse files Browse the repository at this point in the history
  • Loading branch information
jordimas committed Jul 14, 2024
1 parent bb5e541 commit 037df69
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 16 deletions.
13 changes: 6 additions & 7 deletions src/tmx_to_text/converttmx.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ def __init__(self, input_file, en_filename, ca_filename):
self.en_filename = en_filename
self.ca_filename = ca_filename

def convert(self, source_language, target_language, nodup = False):
def convert(self, source_language, target_language, nodup_source = False, nodup_target = False):
entries = 0
duplicated = 0
duplicated_source = 0
seen_sources = set()
tf_en = open(self.en_filename, 'w')
tf_ca = open(self.ca_filename, 'w')
Expand Down Expand Up @@ -77,10 +77,10 @@ def convert(self, source_language, target_language, nodup = False):
translation = translation.replace("\n", '')

write_entry = True
if nodup:
if nodup_source:
if source in seen_sources:
write_entry = False
duplicated += 1
duplicated_source += 1
else:
seen_sources.add(source)

Expand All @@ -92,13 +92,12 @@ def convert(self, source_language, target_language, nodup = False):
tu.clear()
elem.clear()


tf_en.close()
tf_ca.close()
fp.close()
print(f"Wrote {entries} strings")
if nodup:
print(f"Duplicates {duplicated} strings)")
if nodup_source:
print(f"Duplicates {duplicated_source} strings in source")

if entries == 0:
print(f"Make sure using 'info' command that there are actually strings for both languages '{source_language}' and '{target_language}'")
5 changes: 3 additions & 2 deletions src/tmx_to_text/tmx_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def read_parameters():
convert.add_argument('-t', '--target_lang', type=str, required=True, dest='target_lang', help="Target language to export")
convert.add_argument('-p', '--prefix', type=str, dest='prefix', default='', help="Filename prefix used in the generated text files")
convert.add_argument('-d', '--debug', action='store_true', default=False, dest='debug', help="Debug memory and execution time")
convert.add_argument('-x', '--nodups', action='store_true', default=False, dest='nodup', help="Remove duplicates based on source")
convert.add_argument('-x', '--nodup_source', action='store_true', default=False, dest='nodup_source', help="Remove duplicates based on source")
convert.add_argument('-a', '--nodup_target', action='store_true', default=False, dest='nodup_target', help="Remove duplicates based on target")
args = parser.parse_args()
return args

Expand All @@ -58,7 +59,7 @@ def convert(args):
start_time = datetime.datetime.now()

convert = ConvertTmx(tmx_file, txt_en_file, txt_ca_file)
convert.convert(source, target, args.nodup)
convert.convert(source, target, args.nodup_source, args.nodup_target)

if debug:
max_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
Expand Down
4 changes: 2 additions & 2 deletions tests/data/duplicate.xml → tests/data/duplicate_source.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<seg>"Aligner" aligner utility</seg>
</tuv>
<tuv lang="CA">
<seg>Alineador de textos "Aligner"</seg>
<seg>Alineador de textos "Aligner" 1</seg>
</tuv>
</tu>
<tu>
Expand All @@ -19,7 +19,7 @@
<seg>"Aligner" aligner utility</seg>
</tuv>
<tuv lang="CA">
<seg>Alineador de textos "Aligner"</seg>
<seg>Alineador de textos "Aligner" 2</seg>
</tuv>
</tu>
<tu>
Expand Down
10 changes: 5 additions & 5 deletions tests/testconverttmx.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,31 +99,31 @@ def test_conversion_nodup_no(self):
source_file = tempfile.NamedTemporaryFile().name
target_file = tempfile.NamedTemporaryFile().name

tmx_file = self._get_tmx_file('duplicate.xml')
tmx_file = self._get_tmx_file('duplicate_source.xml')
convertTmx = ConvertTmx(tmx_file, source_file, target_file)
convertTmx.convert("en", "ca")

source_lines, target_lines = self._get_lines(source_file, target_file)
self.assertEquals(2, len(source_lines))
self.assertEquals(2, len(target_lines))
self.assertEquals(source_lines[0].rstrip(), '"Aligner" aligner utility')
self.assertEquals(target_lines[0].rstrip(), 'Alineador de textos "Aligner"')
self.assertEquals(target_lines[0].rstrip(), 'Alineador de textos "Aligner" 1')
self.assertEquals(source_lines[1].rstrip(), '"Aligner" aligner utility')
self.assertEquals(target_lines[1].rstrip(), 'Alineador de textos "Aligner"')
self.assertEquals(target_lines[1].rstrip(), 'Alineador de textos "Aligner" 2')

def test_conversion_nodup_yes(self):
source_file = tempfile.NamedTemporaryFile().name
target_file = tempfile.NamedTemporaryFile().name

tmx_file = self._get_tmx_file('duplicate.xml')
tmx_file = self._get_tmx_file('duplicate_source.xml')
convertTmx = ConvertTmx(tmx_file, source_file, target_file)
convertTmx.convert("en", "ca", True)

source_lines, target_lines = self._get_lines(source_file, target_file)
self.assertEquals(1, len(source_lines))
self.assertEquals(1, len(target_lines))
self.assertEquals(source_lines[0].rstrip(), '"Aligner" aligner utility')
self.assertEquals(target_lines[0].rstrip(), 'Alineador de textos "Aligner"')
self.assertEquals(target_lines[0].rstrip(), 'Alineador de textos "Aligner" 1')

if __name__ == '__main__':
unittest.main()

0 comments on commit 037df69

Please sign in to comment.