From af60e1ced00c57c09b35fd904a0c3d56a9066f47 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 1 Mar 2024 10:02:11 +0800 Subject: [PATCH 1/6] Add trainable settings for pt --- deepmd/pt/model/descriptor/dpa1.py | 5 +++-- deepmd/pt/model/descriptor/dpa2.py | 6 ++++++ deepmd/pt/model/descriptor/se_a.py | 4 ++++ deepmd/pt/model/descriptor/se_r.py | 4 ++++ deepmd/pt/model/task/fitting.py | 6 ++++++ deepmd/pt/train/training.py | 3 --- deepmd/pt/train/wrapper.py | 22 ---------------------- 7 files changed, 23 insertions(+), 27 deletions(-) diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py index 0245179d8b..93ca193f2a 100644 --- a/deepmd/pt/model/descriptor/dpa1.py +++ b/deepmd/pt/model/descriptor/dpa1.py @@ -70,8 +70,6 @@ def __init__( raise NotImplementedError("type_one_side is not supported.") if precision != "default" and precision != "float64": raise NotImplementedError("precison is not supported.") - if not trainable: - raise NotImplementedError("trainable == False is not supported.") if exclude_types is not None and exclude_types != []: raise NotImplementedError("exclude_types is not supported.") if stripped_type_embedding: @@ -106,6 +104,9 @@ def __init__( self.type_embedding = TypeEmbedNet(ntypes, tebd_dim) self.tebd_dim = tebd_dim self.concat_output_tebd = concat_output_tebd + # set trainable + for param in self.parameters(): + param.requires_grad = trainable def get_rcut(self) -> float: """Returns the cut-off radius.""" diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py index 20a7c74cda..8ec4deb365 100644 --- a/deepmd/pt/model/descriptor/dpa2.py +++ b/deepmd/pt/model/descriptor/dpa2.py @@ -75,6 +75,7 @@ def __init__( repformer_update_style: str = "res_avg", repformer_set_davg_zero: bool = True, # TODO repformer_add_type_ebd_to_seq: bool = False, + trainable: bool = True, type: Optional[ str ] = None, # work around the bad design in get_trainer and DpLoaderSet! @@ -170,6 +171,8 @@ def __init__( repformers block: set the avg to zero in statistics repformer_add_type_ebd_to_seq : bool repformers block: concatenate the type embedding at the output. + trainable : bool + If the parameters in the descriptor are trainable. Returns ------- @@ -249,6 +252,9 @@ def __init__( self.rcut = self.repinit.get_rcut() self.ntypes = ntypes self.sel = self.repinit.sel + # set trainable + for param in self.parameters(): + param.requires_grad = trainable def get_rcut(self) -> float: """Returns the cut-off radius.""" diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py index fc2cf60531..049f29dca6 100644 --- a/deepmd/pt/model/descriptor/se_a.py +++ b/deepmd/pt/model/descriptor/se_a.py @@ -272,6 +272,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], old_impl: bool = False, type_one_side: bool = True, + trainable: bool = True, **kwargs, ): """Construct an embedding net of type `se_a`. @@ -344,6 +345,9 @@ def __init__( ) self.filter_layers = filter_layers self.stats = None + # set trainable + for param in self.parameters(): + param.requires_grad = trainable def get_rcut(self) -> float: """Returns the cut-off radius.""" diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py index 16721fbe5e..c0c753e4bc 100644 --- a/deepmd/pt/model/descriptor/se_r.py +++ b/deepmd/pt/model/descriptor/se_r.py @@ -63,6 +63,7 @@ def __init__( resnet_dt: bool = False, exclude_types: List[Tuple[int, int]] = [], old_impl: bool = False, + trainable: bool = True, **kwargs, ): super().__init__() @@ -110,6 +111,9 @@ def __init__( ) self.filter_layers = filter_layers self.stats = None + # set trainable + for param in self.parameters(): + param.requires_grad = trainable def get_rcut(self) -> float: """Returns the cut-off radius.""" diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 8e8338210f..e1bccc8519 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -246,6 +246,8 @@ class GeneralFitting(Fitting): Random seed. exclude_types: List[int] Atomic contributions of the excluded atom types are set zero. + trainable : bool + If the parameters in the fitting net are trainable. """ @@ -265,6 +267,7 @@ def __init__( rcond: Optional[float] = None, seed: Optional[int] = None, exclude_types: List[int] = [], + trainable: bool = True, **kwargs, ): super().__init__() @@ -356,6 +359,9 @@ def __init__( if seed is not None: log.info("Set seed to %d in fitting net.", seed) torch.manual_seed(seed) + # set trainable + for param in self.parameters(): + param.requires_grad = trainable def reinit_exclude( self, diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index 152c69a444..96870a25b0 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -399,9 +399,6 @@ def get_loss(loss_params, start_lr, _ntypes): frz_model = torch.jit.load(init_frz_model, map_location=DEVICE) self.model.load_state_dict(frz_model.state_dict()) - # Set trainable params - self.wrapper.set_trainable_params() - # Multi-task share params if shared_links is not None: self.wrapper.share_params(shared_links, resume=model_params["resuming"]) diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py index 74b4a83ce7..518643929a 100644 --- a/deepmd/pt/train/wrapper.py +++ b/deepmd/pt/train/wrapper.py @@ -60,28 +60,6 @@ def __init__( self.loss[task_key] = loss[task_key] self.inference_only = self.loss is None - def set_trainable_params(self): - supported_types = ["type_embedding", "descriptor", "fitting_net"] - for model_item in self.model: - for net_type in supported_types: - trainable = True - if not self.multi_task: - if net_type in self.model_params: - trainable = self.model_params[net_type].get("trainable", True) - else: - if net_type in self.model_params["model_dict"][model_item]: - trainable = self.model_params["model_dict"][model_item][ - net_type - ].get("trainable", True) - if ( - hasattr(self.model[model_item], net_type) - and getattr(self.model[model_item], net_type) is not None - ): - for param in ( - self.model[model_item].__getattr__(net_type).parameters() - ): - param.requires_grad = trainable - def share_params(self, shared_links, resume=False): supported_types = ["type_embedding", "descriptor", "fitting_net"] for shared_item in shared_links: From 588d2197ea233fddd872663eb15878fbbfce83c4 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 1 Mar 2024 10:33:36 +0800 Subject: [PATCH 2/6] Update fitting.py --- deepmd/pt/model/task/fitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index e1bccc8519..bbf2c963a1 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -403,7 +403,7 @@ def serialize(self) -> dict: # "spin": self.spin , ## NOTICE: not supported by far "tot_ener_zero": False, - "trainable": [True] * (len(self.neuron) + 1), + "trainable": self.trainable, "layer_name": None, "use_aparam_as_mask": False, "spin": None, From 31b096c23e810c0c89d1aaf2c7c32342820792c7 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 1 Mar 2024 11:23:14 +0800 Subject: [PATCH 3/6] Update fitting.py --- deepmd/pt/model/task/fitting.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index bbf2c963a1..c349fb4dcf 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -285,6 +285,8 @@ def __init__( self.rcond = rcond # order matters, should be place after the assignment of ntypes self.reinit_exclude(exclude_types) + # need support for each layer settings + self.trainable = all(trainable) if isinstance(trainable, list) else trainable net_dim_out = self._net_out_dim() # init constants @@ -361,7 +363,7 @@ def __init__( torch.manual_seed(seed) # set trainable for param in self.parameters(): - param.requires_grad = trainable + param.requires_grad = self.trainable def reinit_exclude( self, From 47ce9569c3b5170c5e754335ae3adc981540ebfe Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 1 Mar 2024 11:50:09 +0800 Subject: [PATCH 4/6] Update fitting.py --- deepmd/pt/model/task/fitting.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index c349fb4dcf..3efbe44672 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -285,8 +285,7 @@ def __init__( self.rcond = rcond # order matters, should be place after the assignment of ntypes self.reinit_exclude(exclude_types) - # need support for each layer settings - self.trainable = all(trainable) if isinstance(trainable, list) else trainable + self.trainable = trainable net_dim_out = self._net_out_dim() # init constants @@ -363,7 +362,12 @@ def __init__( torch.manual_seed(seed) # set trainable for param in self.parameters(): - param.requires_grad = self.trainable + # need support for each layer settings + param.requires_grad = ( + all(self.trainable) + if isinstance(self.trainable, list) + else self.trainable + ) def reinit_exclude( self, From cbb74992f67f488250a385d75bae6bbc85cf8735 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 1 Mar 2024 14:25:51 +0800 Subject: [PATCH 5/6] Update fitting.py --- deepmd/pt/model/task/fitting.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 3efbe44672..5026cf5773 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -286,6 +286,10 @@ def __init__( # order matters, should be place after the assignment of ntypes self.reinit_exclude(exclude_types) self.trainable = trainable + # need support for each layer settings + self.trainable = ( + all(self.trainable) if isinstance(self.trainable, list) else self.trainable + ) net_dim_out = self._net_out_dim() # init constants @@ -362,12 +366,7 @@ def __init__( torch.manual_seed(seed) # set trainable for param in self.parameters(): - # need support for each layer settings - param.requires_grad = ( - all(self.trainable) - if isinstance(self.trainable, list) - else self.trainable - ) + param.requires_grad = self.trainable def reinit_exclude( self, @@ -409,7 +408,7 @@ def serialize(self) -> dict: # "spin": self.spin , ## NOTICE: not supported by far "tot_ener_zero": False, - "trainable": self.trainable, + "trainable": [self.trainable] * (len(self.neuron) + 1), "layer_name": None, "use_aparam_as_mask": False, "spin": None, From f50afb5cd11e0ff1b40c8fec1d820163178a91c9 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Sat, 2 Mar 2024 00:29:56 +0800 Subject: [PATCH 6/6] Add ut and doc --- deepmd/pt/model/task/fitting.py | 9 ++++++--- deepmd/utils/argcheck.py | 4 ++-- source/tests/pt/test_training.py | 16 ++++++++++++++++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 47dba909b3..f79916b36e 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -7,6 +7,7 @@ from typing import ( List, Optional, + Union, ) import numpy as np @@ -239,8 +240,10 @@ class GeneralFitting(Fitting): Random seed. exclude_types: List[int] Atomic contributions of the excluded atom types are set zero. - trainable : bool - If the parameters in the fitting net are trainable. + trainable : Union[List[bool], bool] + If the parameters in the fitting net are trainable. + Now this only supports setting all the parameters in the fitting net at one state. + When in List[bool], the trainable will be True only if all the boolean parameters are True. remove_vaccum_contribution: List[bool], optional Remove vaccum contribution before the bias is added. The list assigned each type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same @@ -263,7 +266,7 @@ def __init__( rcond: Optional[float] = None, seed: Optional[int] = None, exclude_types: List[int] = [], - trainable: bool = True, + trainable: Union[bool, List[bool]] = True, remove_vaccum_contribution: Optional[List[bool]] = None, **kwargs, ): diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 89b341491e..1f0064c460 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -885,9 +885,9 @@ def fitting_ener(): doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.' doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' - doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\ + doc_trainable = f"Whether the parameters in the fitting net are trainable. This option can be\n\n\ - bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\ -- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1." +- list of bool{doc_only_tf_supported}: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1." doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details." doc_seed = "Random seed for parameter initialization of the fitting net" doc_atom_ener = "Specify the atomic energy in vacuum for each type" diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index 4e73fc4f8a..f2a081610a 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -10,6 +10,8 @@ Path, ) +import torch + from deepmd.pt.entrypoints.main import ( get_trainer, ) @@ -28,6 +30,20 @@ def test_dp_train(self): trainer.run() self.tearDown() + def test_trainable(self): + fix_params = deepcopy(self.config) + fix_params["model"]["descriptor"]["trainable"] = False + fix_params["model"]["fitting_net"]["trainable"] = False + trainer_fix = get_trainer(fix_params) + model_dict_before_training = deepcopy(trainer_fix.model.state_dict()) + trainer_fix.run() + model_dict_after_training = deepcopy(trainer_fix.model.state_dict()) + for key in model_dict_before_training: + torch.testing.assert_allclose( + model_dict_before_training[key], model_dict_after_training[key] + ) + self.tearDown() + def tearDown(self): for f in os.listdir("."): if f.startswith("model") and f.endswith(".pt"):