From af60e1ced00c57c09b35fd904a0c3d56a9066f47 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 1 Mar 2024 10:02:11 +0800
Subject: [PATCH 1/6] Add trainable settings for pt

---
 deepmd/pt/model/descriptor/dpa1.py |  5 +++--
 deepmd/pt/model/descriptor/dpa2.py |  6 ++++++
 deepmd/pt/model/descriptor/se_a.py |  4 ++++
 deepmd/pt/model/descriptor/se_r.py |  4 ++++
 deepmd/pt/model/task/fitting.py    |  6 ++++++
 deepmd/pt/train/training.py        |  3 ---
 deepmd/pt/train/wrapper.py         | 22 ----------------------
 7 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py
index 0245179d8b..93ca193f2a 100644
--- a/deepmd/pt/model/descriptor/dpa1.py
+++ b/deepmd/pt/model/descriptor/dpa1.py
@@ -70,8 +70,6 @@ def __init__(
             raise NotImplementedError("type_one_side is not supported.")
         if precision != "default" and precision != "float64":
             raise NotImplementedError("precison is not supported.")
-        if not trainable:
-            raise NotImplementedError("trainable == False is not supported.")
         if exclude_types is not None and exclude_types != []:
             raise NotImplementedError("exclude_types is not supported.")
         if stripped_type_embedding:
@@ -106,6 +104,9 @@ def __init__(
         self.type_embedding = TypeEmbedNet(ntypes, tebd_dim)
         self.tebd_dim = tebd_dim
         self.concat_output_tebd = concat_output_tebd
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = trainable
 
     def get_rcut(self) -> float:
         """Returns the cut-off radius."""
diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py
index 20a7c74cda..8ec4deb365 100644
--- a/deepmd/pt/model/descriptor/dpa2.py
+++ b/deepmd/pt/model/descriptor/dpa2.py
@@ -75,6 +75,7 @@ def __init__(
         repformer_update_style: str = "res_avg",
         repformer_set_davg_zero: bool = True,  # TODO
         repformer_add_type_ebd_to_seq: bool = False,
+        trainable: bool = True,
         type: Optional[
             str
         ] = None,  # work around the bad design in get_trainer and DpLoaderSet!
@@ -170,6 +171,8 @@ def __init__(
             repformers block: set the avg to zero in statistics
         repformer_add_type_ebd_to_seq : bool
             repformers block: concatenate the type embedding at the output.
+        trainable : bool
+            If the parameters in the descriptor are trainable.
 
         Returns
         -------
@@ -249,6 +252,9 @@ def __init__(
         self.rcut = self.repinit.get_rcut()
         self.ntypes = ntypes
         self.sel = self.repinit.sel
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = trainable
 
     def get_rcut(self) -> float:
         """Returns the cut-off radius."""
diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py
index fc2cf60531..049f29dca6 100644
--- a/deepmd/pt/model/descriptor/se_a.py
+++ b/deepmd/pt/model/descriptor/se_a.py
@@ -272,6 +272,7 @@ def __init__(
         exclude_types: List[Tuple[int, int]] = [],
         old_impl: bool = False,
         type_one_side: bool = True,
+        trainable: bool = True,
         **kwargs,
     ):
         """Construct an embedding net of type `se_a`.
@@ -344,6 +345,9 @@ def __init__(
                 )
             self.filter_layers = filter_layers
         self.stats = None
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = trainable
 
     def get_rcut(self) -> float:
         """Returns the cut-off radius."""
diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py
index 16721fbe5e..c0c753e4bc 100644
--- a/deepmd/pt/model/descriptor/se_r.py
+++ b/deepmd/pt/model/descriptor/se_r.py
@@ -63,6 +63,7 @@ def __init__(
         resnet_dt: bool = False,
         exclude_types: List[Tuple[int, int]] = [],
         old_impl: bool = False,
+        trainable: bool = True,
         **kwargs,
     ):
         super().__init__()
@@ -110,6 +111,9 @@ def __init__(
             )
         self.filter_layers = filter_layers
         self.stats = None
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = trainable
 
     def get_rcut(self) -> float:
         """Returns the cut-off radius."""
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 8e8338210f..e1bccc8519 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -246,6 +246,8 @@ class GeneralFitting(Fitting):
         Random seed.
     exclude_types: List[int]
         Atomic contributions of the excluded atom types are set zero.
+    trainable : bool
+            If the parameters in the fitting net are trainable.
 
     """
 
@@ -265,6 +267,7 @@ def __init__(
         rcond: Optional[float] = None,
         seed: Optional[int] = None,
         exclude_types: List[int] = [],
+        trainable: bool = True,
         **kwargs,
     ):
         super().__init__()
@@ -356,6 +359,9 @@ def __init__(
         if seed is not None:
             log.info("Set seed to %d in fitting net.", seed)
             torch.manual_seed(seed)
+        # set trainable
+        for param in self.parameters():
+            param.requires_grad = trainable
 
     def reinit_exclude(
         self,
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 152c69a444..96870a25b0 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -399,9 +399,6 @@ def get_loss(loss_params, start_lr, _ntypes):
             frz_model = torch.jit.load(init_frz_model, map_location=DEVICE)
             self.model.load_state_dict(frz_model.state_dict())
 
-        # Set trainable params
-        self.wrapper.set_trainable_params()
-
         # Multi-task share params
         if shared_links is not None:
             self.wrapper.share_params(shared_links, resume=model_params["resuming"])
diff --git a/deepmd/pt/train/wrapper.py b/deepmd/pt/train/wrapper.py
index 74b4a83ce7..518643929a 100644
--- a/deepmd/pt/train/wrapper.py
+++ b/deepmd/pt/train/wrapper.py
@@ -60,28 +60,6 @@ def __init__(
                     self.loss[task_key] = loss[task_key]
         self.inference_only = self.loss is None
 
-    def set_trainable_params(self):
-        supported_types = ["type_embedding", "descriptor", "fitting_net"]
-        for model_item in self.model:
-            for net_type in supported_types:
-                trainable = True
-                if not self.multi_task:
-                    if net_type in self.model_params:
-                        trainable = self.model_params[net_type].get("trainable", True)
-                else:
-                    if net_type in self.model_params["model_dict"][model_item]:
-                        trainable = self.model_params["model_dict"][model_item][
-                            net_type
-                        ].get("trainable", True)
-                if (
-                    hasattr(self.model[model_item], net_type)
-                    and getattr(self.model[model_item], net_type) is not None
-                ):
-                    for param in (
-                        self.model[model_item].__getattr__(net_type).parameters()
-                    ):
-                        param.requires_grad = trainable
-
     def share_params(self, shared_links, resume=False):
         supported_types = ["type_embedding", "descriptor", "fitting_net"]
         for shared_item in shared_links:

From 588d2197ea233fddd872663eb15878fbbfce83c4 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 1 Mar 2024 10:33:36 +0800
Subject: [PATCH 2/6] Update fitting.py

---
 deepmd/pt/model/task/fitting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index e1bccc8519..bbf2c963a1 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -403,7 +403,7 @@ def serialize(self) -> dict:
             # "spin": self.spin ,
             ## NOTICE:  not supported by far
             "tot_ener_zero": False,
-            "trainable": [True] * (len(self.neuron) + 1),
+            "trainable": self.trainable,
             "layer_name": None,
             "use_aparam_as_mask": False,
             "spin": None,

From 31b096c23e810c0c89d1aaf2c7c32342820792c7 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 1 Mar 2024 11:23:14 +0800
Subject: [PATCH 3/6] Update fitting.py

---
 deepmd/pt/model/task/fitting.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index bbf2c963a1..c349fb4dcf 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -285,6 +285,8 @@ def __init__(
         self.rcond = rcond
         # order matters, should be place after the assignment of ntypes
         self.reinit_exclude(exclude_types)
+        # need support for each layer settings
+        self.trainable = all(trainable) if isinstance(trainable, list) else trainable
 
         net_dim_out = self._net_out_dim()
         # init constants
@@ -361,7 +363,7 @@ def __init__(
             torch.manual_seed(seed)
         # set trainable
         for param in self.parameters():
-            param.requires_grad = trainable
+            param.requires_grad = self.trainable
 
     def reinit_exclude(
         self,

From 47ce9569c3b5170c5e754335ae3adc981540ebfe Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 1 Mar 2024 11:50:09 +0800
Subject: [PATCH 4/6] Update fitting.py

---
 deepmd/pt/model/task/fitting.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index c349fb4dcf..3efbe44672 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -285,8 +285,7 @@ def __init__(
         self.rcond = rcond
         # order matters, should be place after the assignment of ntypes
         self.reinit_exclude(exclude_types)
-        # need support for each layer settings
-        self.trainable = all(trainable) if isinstance(trainable, list) else trainable
+        self.trainable = trainable
 
         net_dim_out = self._net_out_dim()
         # init constants
@@ -363,7 +362,12 @@ def __init__(
             torch.manual_seed(seed)
         # set trainable
         for param in self.parameters():
-            param.requires_grad = self.trainable
+            # need support for each layer settings
+            param.requires_grad = (
+                all(self.trainable)
+                if isinstance(self.trainable, list)
+                else self.trainable
+            )
 
     def reinit_exclude(
         self,

From cbb74992f67f488250a385d75bae6bbc85cf8735 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 1 Mar 2024 14:25:51 +0800
Subject: [PATCH 5/6] Update fitting.py

---
 deepmd/pt/model/task/fitting.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 3efbe44672..5026cf5773 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -286,6 +286,10 @@ def __init__(
         # order matters, should be place after the assignment of ntypes
         self.reinit_exclude(exclude_types)
         self.trainable = trainable
+        # need support for each layer settings
+        self.trainable = (
+            all(self.trainable) if isinstance(self.trainable, list) else self.trainable
+        )
 
         net_dim_out = self._net_out_dim()
         # init constants
@@ -362,12 +366,7 @@ def __init__(
             torch.manual_seed(seed)
         # set trainable
         for param in self.parameters():
-            # need support for each layer settings
-            param.requires_grad = (
-                all(self.trainable)
-                if isinstance(self.trainable, list)
-                else self.trainable
-            )
+            param.requires_grad = self.trainable
 
     def reinit_exclude(
         self,
@@ -409,7 +408,7 @@ def serialize(self) -> dict:
             # "spin": self.spin ,
             ## NOTICE:  not supported by far
             "tot_ener_zero": False,
-            "trainable": self.trainable,
+            "trainable": [self.trainable] * (len(self.neuron) + 1),
             "layer_name": None,
             "use_aparam_as_mask": False,
             "spin": None,

From f50afb5cd11e0ff1b40c8fec1d820163178a91c9 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Sat, 2 Mar 2024 00:29:56 +0800
Subject: [PATCH 6/6] Add ut and doc

---
 deepmd/pt/model/task/fitting.py  |  9 ++++++---
 deepmd/utils/argcheck.py         |  4 ++--
 source/tests/pt/test_training.py | 16 ++++++++++++++++
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
index 47dba909b3..f79916b36e 100644
--- a/deepmd/pt/model/task/fitting.py
+++ b/deepmd/pt/model/task/fitting.py
@@ -7,6 +7,7 @@
 from typing import (
     List,
     Optional,
+    Union,
 )
 
 import numpy as np
@@ -239,8 +240,10 @@ class GeneralFitting(Fitting):
         Random seed.
     exclude_types: List[int]
         Atomic contributions of the excluded atom types are set zero.
-    trainable : bool
-            If the parameters in the fitting net are trainable.
+    trainable : Union[List[bool], bool]
+        If the parameters in the fitting net are trainable.
+        Now this only supports setting all the parameters in the fitting net at one state.
+        When in List[bool], the trainable will be True only if all the boolean parameters are True.
     remove_vaccum_contribution: List[bool], optional
         Remove vaccum contribution before the bias is added. The list assigned each
         type. For `mixed_types` provide `[True]`, otherwise it should be a list of the same
@@ -263,7 +266,7 @@ def __init__(
         rcond: Optional[float] = None,
         seed: Optional[int] = None,
         exclude_types: List[int] = [],
-        trainable: bool = True,
+        trainable: Union[bool, List[bool]] = True,
         remove_vaccum_contribution: Optional[List[bool]] = None,
         **kwargs,
     ):
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 89b341491e..1f0064c460 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -885,9 +885,9 @@ def fitting_ener():
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version. If you set "None" or "none" here, no activation function will be used.'
     doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
+    doc_trainable = f"Whether the parameters in the fitting net are trainable. This option can be\n\n\
 - bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
+- list of bool{doc_only_tf_supported}: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
     doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
     doc_seed = "Random seed for parameter initialization of the fitting net"
     doc_atom_ener = "Specify the atomic energy in vacuum for each type"
diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py
index 4e73fc4f8a..f2a081610a 100644
--- a/source/tests/pt/test_training.py
+++ b/source/tests/pt/test_training.py
@@ -10,6 +10,8 @@
     Path,
 )
 
+import torch
+
 from deepmd.pt.entrypoints.main import (
     get_trainer,
 )
@@ -28,6 +30,20 @@ def test_dp_train(self):
         trainer.run()
         self.tearDown()
 
+    def test_trainable(self):
+        fix_params = deepcopy(self.config)
+        fix_params["model"]["descriptor"]["trainable"] = False
+        fix_params["model"]["fitting_net"]["trainable"] = False
+        trainer_fix = get_trainer(fix_params)
+        model_dict_before_training = deepcopy(trainer_fix.model.state_dict())
+        trainer_fix.run()
+        model_dict_after_training = deepcopy(trainer_fix.model.state_dict())
+        for key in model_dict_before_training:
+            torch.testing.assert_allclose(
+                model_dict_before_training[key], model_dict_after_training[key]
+            )
+        self.tearDown()
+
     def tearDown(self):
         for f in os.listdir("."):
             if f.startswith("model") and f.endswith(".pt"):