Skip to content

Commit

Permalink
Added test for electra tokenizers
Browse files Browse the repository at this point in the history
  • Loading branch information
KennethEnevoldsen committed Dec 30, 2023
1 parent 85c2a5b commit 54cd512
Showing 1 changed file with 18 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pytest

from curated_transformers.tokenizers.legacy.bert_tokenizer import (
BERTTokenizer,
)

from ...compat import has_hf_transformers
from ..util import compare_tokenizer_outputs_with_hf_tokenizer


@pytest.mark.skipif(not has_hf_transformers, reason="requires huggingface transformers")
@pytest.mark.parametrize(
"model_name", ["jonfd/electra-small-nordic", "Maltehb/aelaectra-danish-electra-small-cased", "google/electra-small-discriminator"]
)
def test_from_hf_hub_equals_hf_tokenizer(model_name: str, sample_texts):
compare_tokenizer_outputs_with_hf_tokenizer(
sample_texts, model_name, BERTTokenizer
)

0 comments on commit 54cd512

Please sign in to comment.