Skip to content

Commit

Permalink
Merge change_energy_bias and fix finetune
Browse files Browse the repository at this point in the history
  • Loading branch information
iProzd committed Mar 1, 2024
1 parent e26c118 commit 4f98319
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 208 deletions.
1 change: 1 addition & 0 deletions deepmd/pt/model/descriptor/dpa2.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(
repformer_update_style: str = "res_avg",
repformer_set_davg_zero: bool = True, # TODO
repformer_add_type_ebd_to_seq: bool = False,
trainable: bool = True,
type: Optional[
str
] = None, # work around the bad design in get_trainer and DpLoaderSet!
Expand Down
140 changes: 45 additions & 95 deletions deepmd/pt/model/task/fitting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import copy
import logging
import os
import tempfile
from abc import (
abstractmethod,
)
Expand All @@ -12,6 +14,9 @@
import numpy as np
import torch

from deepmd.infer.deep_eval import (
DeepEval,
)
from deepmd.pt.model.network.mlp import (
FittingNet,
NetworkCollection,
Expand All @@ -25,9 +30,6 @@
from deepmd.pt.utils import (
env,
)
from deepmd.pt.utils.dataloader import (
DpLoaderSet,
)
from deepmd.pt.utils.env import (
DEFAULT_PRECISION,
DEVICE,
Expand All @@ -36,13 +38,16 @@
from deepmd.pt.utils.exclude_mask import (
AtomExcludeMask,
)
from deepmd.pt.utils.stat import (
make_stat_input,
)
from deepmd.pt.utils.utils import (
to_numpy_array,
to_torch_tensor,
)
from deepmd.utils.data_system import (
DeepmdDataSystem,
)
from deepmd.utils.finetune import (
change_energy_bias_lower,
)
from deepmd.utils.version import (
check_version_compatibility,
)
Expand Down Expand Up @@ -86,7 +91,13 @@ def share_params(self, base_class, shared_level, resume=False):
raise NotImplementedError

def change_energy_bias(
self, config, model, old_type_map, new_type_map, bias_shift="delta", ntest=10
self,
config,
model,
old_type_map: List[str],
new_type_map: List[str],
bias_shift="delta",
ntest=10,
):
"""Change the energy bias according to the input data and the pretrained model.
Expand All @@ -96,9 +107,9 @@ def change_energy_bias(
The configuration.
model : EnergyModel
Energy model loaded pre-trained model.
new_type_map : list
new_type_map : List[str]
The original type_map in dataset, they are targets to change the energy bias.
old_type_map : str
old_type_map : List[str]
The full type_map in pretrained model
bias_shift : str
The mode for changing energy bias : ['delta', 'statistic']
Expand All @@ -114,93 +125,32 @@ def change_energy_bias(
)
# data
systems = config["training"]["training_data"]["systems"]
finetune_data = DpLoaderSet(systems, ntest, config["model"])
sampled = make_stat_input(finetune_data.systems, finetune_data.dataloaders, 1)
# map
sorter = np.argsort(old_type_map)
idx_type_map = sorter[
np.searchsorted(old_type_map, new_type_map, sorter=sorter)
]
data_mixed_types = np.all([i.mixed_type for i in finetune_data.systems])
numb_type = len(old_type_map)
type_numbs, energy_ground_truth, energy_predict = [], [], []
for test_data in sampled:
nframes = test_data["energy"].shape[0]
if data_mixed_types:
atype = test_data["atype"].detach().cpu().numpy()
else:
atype = test_data["atype"][0].detach().cpu().numpy()
assert np.array(
[i.item() in idx_type_map for i in list(set(atype.reshape(-1)))]
).all(), "Some types are not in 'type_map'!"
energy_ground_truth.append(test_data["energy"].cpu().numpy())
if data_mixed_types:
type_numbs.append(
np.array(
[(atype == i).sum(axis=-1) for i in idx_type_map],
dtype=np.int32,
).T
)
else:
type_numbs.append(
np.tile(
np.bincount(atype, minlength=numb_type)[idx_type_map],
(nframes, 1),
)
)
if bias_shift == "delta":
coord = test_data["coord"].to(DEVICE)
atype = test_data["atype"].to(DEVICE)
box = (
test_data["box"].to(DEVICE)
if test_data["box"] is not None
else None
)
ret = model(coord, atype, box)
energy_predict.append(
ret["energy"].reshape([nframes, 1]).detach().cpu().numpy()
)
type_numbs = np.concatenate(type_numbs)
energy_ground_truth = np.concatenate(energy_ground_truth)
old_bias = self.bias_atom_e[idx_type_map]
if bias_shift == "delta":
energy_predict = np.concatenate(energy_predict)
bias_diff = energy_ground_truth - energy_predict
delta_bias = np.linalg.lstsq(type_numbs, bias_diff, rcond=None)[0]
unbias_e = energy_predict + type_numbs @ delta_bias
atom_numbs = type_numbs.sum(-1)
rmse_ae = np.sqrt(
np.mean(
np.square(
(unbias_e.ravel() - energy_ground_truth.ravel()) / atom_numbs
)
)
)
self.bias_atom_e[idx_type_map] += torch.from_numpy(
delta_bias.reshape(-1)
).to(DEVICE)
log.info(
f"RMSE of atomic energy after linear regression is: {rmse_ae:10.5e} eV/atom."
)
elif bias_shift == "statistic":
statistic_bias = np.linalg.lstsq(
type_numbs, energy_ground_truth, rcond=None
)[0]
self.bias_atom_e[idx_type_map] = (
torch.from_numpy(statistic_bias.reshape(-1))
.type_as(self.bias_atom_e[idx_type_map])
.to(DEVICE)
)
else:
raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
log.info(
"Change energy bias of {} from {} to {}.".format(
str(new_type_map),
str(old_bias.detach().cpu().numpy()),
str(self.bias_atom_e[idx_type_map].detach().cpu().numpy()),
)
finetune_data = DeepmdDataSystem(
systems=systems,
batch_size=config["training"]["training_data"].get("batch_size", "auto"),
test_size=1,
)
finetune_data.add("energy", ndof=1, atomic=False, must=True, high_prec=True)
model = torch.jit.script(model)
tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
torch.jit.save(model, tmp_model.name)
dp = DeepEval(tmp_model.name)
os.unlink(tmp_model.name)
bias = change_energy_bias_lower(
finetune_data,
dp,
new_type_map,
old_type_map,
self.bias_atom_e.detach().cpu().numpy().reshape(-1),
bias_shift=bias_shift,
ntest=ntest,
)
self.bias_atom_e = (
torch.from_numpy(bias)
.type_as(self.bias_atom_e)
.reshape(self.bias_atom_e.shape)
.to(DEVICE)
)
return None


class GeneralFitting(Fitting):
Expand Down
3 changes: 1 addition & 2 deletions deepmd/pt/utils/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def change_finetune_model_params(
last_model_params = state_dict["_extra_state"]["model_params"]
finetune_multi_task = "model_dict" in last_model_params
trainable_param = {
"type_embedding": True,
"descriptor": True,
"fitting_net": True,
}
Expand Down Expand Up @@ -74,7 +73,7 @@ def change_finetune_model_params(
assert set(new_type_map).issubset(
old_type_map
), "Only support for smaller type map when finetuning or resuming."
for key_item in ["type_map", "type_embedding", "descriptor"]:
for key_item in ["type_map", "descriptor"]:
if key_item in model_dict_params[model_branch_chosen]:
model_config[key_item] = model_dict_params[model_branch_chosen][
key_item
Expand Down
122 changes: 11 additions & 111 deletions deepmd/tf/fit/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@
from deepmd.tf.utils.spin import (
Spin,
)
from deepmd.utils.finetune import (
change_energy_bias_lower,
)
from deepmd.utils.out_stat import (
compute_stats_from_redu,
)
Expand Down Expand Up @@ -793,121 +796,18 @@ def change_energy_bias(
bias_shift="delta",
ntest=10,
) -> None:
"""Change the energy bias according to the input data and the pretrained model.
Parameters
----------
data : DeepmdDataSystem
The training data.
frozen_model : str
The path file of frozen model.
origin_type_map : list
The original type_map in dataset, they are targets to change the energy bias.
full_type_map : str
The full type_map in pretrained model
bias_shift : str
The mode for changing energy bias : ['delta', 'statistic']
'delta' : perform predictions on energies of target dataset,
and do least sqaure on the errors to obtain the target shift as bias.
'statistic' : directly use the statistic energy bias in the target dataset.
ntest : int
The number of test samples in a system to change the energy bias.
"""
type_numbs = []
energy_ground_truth = []
energy_predict = []
sorter = np.argsort(full_type_map)
idx_type_map = sorter[
np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
]
mixed_type = data.mixed_type
numb_type = len(full_type_map)
dp = None
if bias_shift == "delta":
# init model
dp = DeepPotential(frozen_model)
for sys in data.data_systems:
test_data = sys.get_test()
nframes = test_data["box"].shape[0]
numb_test = min(nframes, ntest)
if mixed_type:
atype = test_data["type"][:numb_test].reshape([numb_test, -1])
else:
atype = test_data["type"][0]
assert np.array(
[i in idx_type_map for i in list(set(atype.reshape(-1)))]
).all(), "Some types are not in 'type_map'!"
energy_ground_truth.append(
test_data["energy"][:numb_test].reshape([numb_test, 1])
)
if mixed_type:
type_numbs.append(
np.array(
[(atype == i).sum(axis=-1) for i in idx_type_map],
dtype=np.int32,
).T
)
else:
type_numbs.append(
np.tile(
np.bincount(atype, minlength=numb_type)[idx_type_map],
(numb_test, 1),
)
)
if bias_shift == "delta":
coord = test_data["coord"][:numb_test].reshape([numb_test, -1])
if sys.pbc:
box = test_data["box"][:numb_test]
else:
box = None
if dp.get_dim_fparam() > 0:
fparam = test_data["fparam"][:numb_test]
else:
fparam = None
if dp.get_dim_aparam() > 0:
aparam = test_data["aparam"][:numb_test]
else:
aparam = None
ret = dp.eval(
coord,
box,
atype,
mixed_type=mixed_type,
fparam=fparam,
aparam=aparam,
)
energy_predict.append(ret[0].reshape([numb_test, 1]))
type_numbs = np.concatenate(type_numbs)
energy_ground_truth = np.concatenate(energy_ground_truth)
old_bias = self.bias_atom_e[idx_type_map]
if bias_shift == "delta":
energy_predict = np.concatenate(energy_predict)
bias_diff = energy_ground_truth - energy_predict
delta_bias = np.linalg.lstsq(type_numbs, bias_diff, rcond=None)[0]
unbias_e = energy_predict + type_numbs @ delta_bias
atom_numbs = type_numbs.sum(-1)
rmse_ae = np.sqrt(
np.mean(
np.square(
(unbias_e.ravel() - energy_ground_truth.ravel()) / atom_numbs
)
)
)
self.bias_atom_e[idx_type_map] += delta_bias.reshape(-1)
log.info(
f"RMSE of atomic energy after linear regression is: {rmse_ae} eV/atom."
)
elif bias_shift == "statistic":
statistic_bias = np.linalg.lstsq(
type_numbs, energy_ground_truth, rcond=None
)[0]
self.bias_atom_e[idx_type_map] = statistic_bias.reshape(-1)
else:
raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
log.info(
"Change energy bias of {} from {} to {}.".format(
str(origin_type_map), str(old_bias), str(self.bias_atom_e[idx_type_map])
)
self.bias_atom_e = change_energy_bias_lower(
data,
dp,
origin_type_map,
full_type_map,
self.bias_atom_e,
bias_shift=bias_shift,
ntest=ntest,
)

def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
Expand Down
Loading

0 comments on commit 4f98319

Please sign in to comment.