diff --git a/hanlp/components/ner/transformer_ner.py b/hanlp/components/ner/transformer_ner.py index 2579f0d32..15ee0585f 100644 --- a/hanlp/components/ner/transformer_ner.py +++ b/hanlp/components/ner/transformer_ner.py @@ -60,8 +60,7 @@ def tag_to_span(self, batch_tags, batch): for tags, tokens in zip(batch_tags, sents): if dict_whitelist: for start, end, label in dict_whitelist.tokenize(tokens): - if (tags[start].startswith('B') or tags[start].startswith('S')) and ( - tags[end - 1].startswith('E') or tags[end - 1].startswith('S')): + if (not tags[start][0] in 'ME') and (not tags[end - 1][0] in 'BM'): if end - start == 1: tags[start] = 'S-' + label else: diff --git a/hanlp/version.py b/hanlp/version.py index 661632937..77c220a4e 100644 --- a/hanlp/version.py +++ b/hanlp/version.py @@ -2,5 +2,5 @@ # Author: hankcs # Date: 2019-12-28 19:26 -__version__ = '2.1.0-alpha.36' +__version__ = '2.1.0-alpha.37' """HanLP version""" diff --git a/plugins/hanlp_demo/hanlp_demo/zh/demo_ner_dict.py b/plugins/hanlp_demo/hanlp_demo/zh/demo_ner_dict.py new file mode 100644 index 000000000..4d93e0951 --- /dev/null +++ b/plugins/hanlp_demo/hanlp_demo/zh/demo_ner_dict.py @@ -0,0 +1,12 @@ +# -*- coding:utf-8 -*- +# Author: hankcs +# Date: 2021-04-29 11:06 +import hanlp + +HanLP = hanlp.load(hanlp.pretrained.mtl.CLOSE_TOK_POS_NER_SRL_DEP_SDP_CON_ELECTRA_BASE_ZH) +HanLP['ner/msra'].dict_whitelist = {'午饭后': 'TIME'} +doc = HanLP('2021年测试高血压是138,时间是午饭后2点45,低血压是44', tasks='ner/msra') +doc.pretty_print() +print(doc['ner/msra']) + +# See https://hanlp.hankcs.com/docs/api/hanlp/components/mtl/tasks/ner/tag_ner.html