API Examples
Examples of using the C++ Tadah! library.
Example 1 - Traininig Process and Simple Prediction
Example 1 c++ file:
#include <tadah/models/cut_all.h>
#include <tadah/models/descriptors/d_all.h>
#include <tadah/mlip/models/m_all.h>
#include <tadah/mlip/design_matrix/functions/basis_functions/dm_bf_all.h>
#include <tadah/models/functions/kernels/kern_all.h>
#include <tadah/core/context.h>
#include <tadah/core/core_user_config_parser.h>
#include <tadah/mlip/structure.h>
#include <tadah/mlip/descriptors_calc.h>
#include <tadah/mlip/nn_finder.h>
#include <tadah/mlip/output/output.h>
#include <fstream>
using namespace tadah::core;
using namespace tadah::models;
using namespace tadah::mlip;
/** @file ex1.cpp
* This example shows how to perform training and
* how to predict with a trained model.
*
* To compile with `g++` and run:
*
* \code{.sh}
* $ g++ -std=c++17 -O3 ex1.cpp -o ex1.out -ltadah.mlip -ltadah.models -ltadah.core -llapack -lblas -fopenmp
* $ ./ex1.out
* \endcode
*
* Tadah! models and descriptors are selected at compile time but all model
* parameters are provided in the config file.
*
* Here we use D2_Blip two-body descriptor and train using both energies
* and virial stresses.
*
* Files:
*
* - `ex1.cpp`
* Example c++ script for training and prediction.
* - `config`
* Config file used for training, contains all model parameters.
* - `config_pred`
* List of datasets used for prediction (\ref DBFILE key).
* Keys \ref FORCE and \ref STRESS controls whether forces and stresses
* are predicted.
* - `tdata.db`
* Dataset which we will use for both training and prediction.
* The dataset is generated using EAM model for Ta by R.Ravelo.
* https://journals.aps.org/prb/abstract/10.1103/PhysRevB.88.134101
*/
int main() {
std::cout << "TRAINING STAGE" << std::endl;
// Config file configures almost all model parameters.
// See below for a more detailed explanation of used key-value(s) pairs.
Context context = CoreUserConfigParser().parse("config");
// First we load all training data from a list
// of training datasets into StrutureDB object.
// Paths to datasets are specified with a key DBFILE in a config file.
std::cout << "StructureDB loading data..." << std::flush;
StructureDB stdb(context);
std::cout << "Done!" << std::endl;
// Next we pass StructureDB object to the nearest neighbour calculator.
// NNFinder will create full nearest neighbours lists for every atom
// in every structure. These lists will be stored by individual Structures
// in a StructureDB object.
// The lists are calculated up to the max cutoff from the config file:
// cutoff_max = max(RCUT2B, RCUTMB).
std::cout << "Calculating nearest neighbours..." << std::flush;
NNFinder nnf;
nnf.calc(stdb,context);
std::cout << "Done!" << std::endl;
// STEP 1a: Select descriptors.
// All three types must be specified.
// Use Dummy if given type is not required.
// D2 - TWO-BODY
//using D2=D2_LJ;
//using D2=D2_BP;
using D2=D2_Blip;
//using D2=D2_Dummy;
//using D2=D2_EAM;
// DM - MANY-BODY
//using DM=DM_EAM;
//using DM=DM_EAD;
using DM=DM_Dummy;
// STEP 1b: Select cutoffs, C2 for D2, CM for DM
using C2=Cut_Cos;
using CM=Cut_Dummy;
// STEP 1c: Prepare descriptor calculator
DescriptorsCalc<D2,DM,C2,CM> dc(context);
// STEP 2a: Select Basis Function (BF) or Kernels (K).
// BF is used for M_BLR - Bayesian Linear Regression
// K is used with M_KRR - Kernel Ridge Regression
// See documentation for more BF and K
using BF=DM_BF_Linear;
//using BF=BF_Polynomial2;
//using K=Kern_Linear;
//using K=Kern_Quadratic;
// STEP 2b: Select Model
using M=M_BLR<BF>;
//using M=M_KRR<K>;
//// STEP 2c: Instantiate a model
M model(context);
//std::cout << "TRAINING STAGE..." << std::flush;
// STEP 3: Training - Option 1.
// Train with StructureDB only. We have to provide calculators here.
// Descriptors are calculated in batches to construct a design matrix
// and then are discarded.
// This is usually the best choice unless you need descriptors for something else
// after the training is done.
model.train(stdb,dc);
// STEP 3: Training - Option 2.
// Train with StructureDB and precalcualted StDescriptorsDB.
//StDescriptorsDB st_desc_db = dc.calc(stdb);
//model.train(st_desc_db,stdb);
std::cout << "Done!" << std::endl;
// STEP 4: Save model to a text file.
// Once model is trained we can dump it to a file.
// Saved models can be used with LAMMPS or can be reloaded
// to make predictions.
std::cout << "Saving LAMMPS pot.tadah file..." << std::flush;
Context param_file = model.get_param_file();
std::ofstream outfile("pot.tadah");
outfile << param_file << std::endl;
outfile.close();
std::cout << "Done!" << std::endl;
std::cout << "PREDICTION STAGE..." << std::endl;
// STEP 1: We will reuse LAMMPS param file and add to it
// DBFILE(s) from config_pred file.
// In other words training datasets go to the config file
// and validation datasets are in the config_pred
Context temp = CoreUserConfigParser().parse("config_pred");
param_file = Context::merge(param_file,temp);
// STEP 2: Load DBFILE from config_pred
std::cout << "StructureDB loading data..." << std::flush;
StructureDB stdb2(param_file);
std::cout << "Done!" << std::endl;
// STEP 3: Calculate nearest neighbours
std::cout << "Calculating nearest neighbours..." << std::flush;
NNFinder nnf2;
nnf2.calc(stdb2,param_file);
std::cout << "Done!" << std::endl;
// STEP 4: Prepare DescriptorCalc
DescriptorsCalc<D2,DM,C2,CM> dc2(param_file);
// STEP 5: Results are saved to new StructureDB object
// - it will only contain predicted values
// so there are no atom positions, etc...
t_type predicted_error; // container for prediction error
param_file.add("OUTFILE","energy.pred");
param_file.add("OUTFILE","forces.pred");
param_file.add("OUTFILE","stress.pred");
std::cout << "Predicting..." << std::flush;
StructureDB stpred = model.predict(param_file,stdb2,dc2);
//StructureDB stpred = model.predict(param_file,stdb2,dc2,predicted_error);
std::cout << "Done!" << std::endl;
std::cout << "Dumping results to disk..." << std::flush;
Output output(param_file);
output.print_predict_all(stdb,stpred,predicted_error);
std::cout << "Done!" << std::endl;
return 0;
}
Config file used for training:
# For description of KEYS and corresponding values see Config documentation:
# https://ta-dah.readthedocs.io/en/latest/config.html
DBFILE tdata.db
ATOM Ta
WATOM 73
INIT2B true
TYPE2B D2_Blip 4 4 Ta Ta
RCUT2B 5.3
FORCE false
STRESS true
SGRID2B GEOM 4 0.1 1.0
CGRID2B LIN 4 1.0 5.3
LAMBDA 1e-8
BIAS true
NORM false
VERBOSE 2
NUMERIC 14
EWEIGHT 1.0
#FWEIGHT 1e-2
#SWEIGHT 1e-3
Config file used for prediction:
DBFILE tdata.db
FORCE true
Stress true
NUMERIC 5
Example 2 - Prediction using existing model
Example 2 c++ file:
#include <tadah/models/cut_all.h>
#include <tadah/models/descriptors/d_all.h>
#include <tadah/mlip/models/m_all.h>
#include <tadah/mlip/descriptors_calc.h>
#include <tadah/core/context.h>
#include <tadah/core/core_user_config_parser.h>
#include <tadah/mlip/structure.h>
#include <tadah/mlip/nn_finder.h>
#include <tadah/mlip/output/output.h>
#include <tadah/mlip/design_matrix/functions/dm_f_all.h>
#include <fstream>
using namespace tadah::core;
using namespace tadah::models;
using namespace tadah::mlip;
/** @file ex2.cpp
* This example shows how to predict with a trained model.
* Example model is provided in a `pot.tadah` file.
*
* To compile with `g++` and run:
*
* \code{.sh}
* $ g++ -std=c++17 -O3 ex2.cpp -o ex2.out -ltadah.mlip -ltadah.models -ltadah.core -llapack -lblas -fopenmp
* $ ./ex2.out
* \endcode
*
* Tadah! models and descriptors are selected at compile time but all model
* parameters are provided in the `pot.tadah` file. Model, cutoff and descriptors
* in the `ex2.cpp` file must match those in the `pot.tadah` file.
* See code comment bellow for more detail.
*
* Files:
*
* - `ex2.cpp`
* Example c++ script for prediction using already available model.
* - `config_pred`
* List of datasets used for prediction (\ref DBFILE key).
* Keys \ref FORCE and \ref STRESS controls whether forces and stresses
* are predicted.
* - `tdata.db`
* Dataset which we will use for prediction.
* The dataset is generated using EAM model for Ta by R.Ravelo.
* https://journals.aps.org/prb/abstract/10.1103/PhysRevB.88.134101
*/
int main() {
// STEP 0: Load model saved in a `pot.tadah` as a Config object.
Context param_file = CoreUserConfigParser().parse("pot.tadah");
// STEP 1a: Select descriptors. Both types must be specified.
// Use Dummy if given type is not required.
// Look for keywords `TYPE2B` and `TYPEMB` in a `pot.tadah`
// If keyword is not listed use `D2_Dummy` as a descriptor.
// D2 - TWO-BODY
// `pot.tadah`: TYPE2B D2_Blip
using D2=D2_Blip;
// DM - MANY-BODY
// `pot.tadah` no keyword
using DM=DM_Dummy;
// STEP 2b: Select cutoffs for descriptors, C2 for D2, CM for DM
// Look for keywords `RCTYPE2B` and `RCTYPEMB` in the `pot.tadah`
// If keyword is not listed use `Cut_Dummy`.
// `pot.tadah`: RCTYPE2B Cut_Cos
using C2=Cut_Cos;
// `pot.tadah` no keywords for many-body
using CM=Cut_Dummy;
// STEP 2a: Select Basis Function (BF) or Kernels (K).
// BF is used for M_BLR - Bayesian Linear Regression
// K is used with M_KRR - Kernel Ridge Regression
// KEYWORD `MODEL`: first argument is model, second BF/Kernel
// `pot.tadah`: MODEL M_KRR Kern_Linear
using K=DM_Kern_Linear;
// STEP 2b: Select Model and instantiate object.
// `pot.tadah`: MODEL M_KRR Kern_Linear
using M=M_KRR<K>;
M model(param_file);
std::cout << "PREDICTION STAGE" << std::endl;
// We will reuse param_file Config file and add to it
// DBFILE(s) from config_pred file.
// config_pred contain
Context temp = CoreUserConfigParser().parse("config_pred");
param_file = Context::merge(param_file,temp);
// Load DBFILE from config_pred
std::cout << "StructureDB loading data..." << std::flush;
StructureDB stdb(param_file);
std::cout << "Done!" << std::endl;
// Calculate nearest neighbours
std::cout << "Calculating nearest neighbours..." << std::flush;
NNFinder nnf;
nnf.calc(stdb,param_file);
std::cout << "Done!" << std::endl;
// Calculate descriptors and store them in StDescriptorsDB
//std::cout << "Calculating descriptors..." << std::flush;
DescriptorsCalc<D2,DM,C2,CM> dc2(param_file);
//StDescriptorsDB st_desc_db = dc2.calc(stdb);
//std::cout << "Done!" << std::endl;
// open file streams for energy and force prediction
std::ofstream out_force("forces.pred");
std::ofstream out_energy("energy.pred");
std::ofstream out_stress("stress.pred");
// predict energies (and forces if FORCE true). Result is saved
// to new StructureDB object - it will only contain predicted values
// so there are no atom positions, etc...
//t_type pred_err; // TODO dump it ...
////StructureDB stpred = model.predict(param_file, stdb,dc2,pred_err);
//std::cout << "Predicting..." << std::flush;
//StructureDB stpred = model.predict(param_file,st_desc_db, stdb);
//std::cout << "Done!" << std::endl;
t_type predicted_error; // container for prediction error
param_file.add("OUTFILE","energy.pred");
param_file.add("OUTFILE","forces.pred");
param_file.add("OUTFILE","stress.pred");
std::cout << "Predicting..." << std::flush;
StructureDB stpred = model.predict(param_file,stdb,dc2);
//StructureDB stpred = model.predict(param_file,stdb,dc2,predicted_error);
std::cout << "Done!" << std::endl;
std::cout << "Dumping results to disk..." << std::flush;
Output output(param_file);
output.print_predict_all(stdb,stpred,predicted_error);
std::cout << "Done!" << std::endl;
return 0;
}
Trained model used for prediction:
ATOM Ta
BIAS true
CGRID2B LIN 4 1.0 5.3
DIMER false 0 false
EWEIGHT 1.0
FWEIGHT 1.0
INIT2B true
INITMB false
LAMBDA 1e-8
MODEL M_BLR BF_Linear
NORM false
NUMERIC 14
OALGO 1
RCTYPE2B Cut_Cos
RCUT2B 5.3
SGRID2B GEOM 4 0.1 1.0
SWEIGHT 1.0
TYPE2B D2_Blip 4 4 Ta Ta
VERBOSE 2
WATOM 73
WEIGHTS -5.318656330932407528 0.380609748134734693 -0.336665868854840844 -0.054088440523269311 -0.082908752531337310
Config file used for prediction:
DBFILE tdata.db
FORCE true
STRESS true
NUMERIC 5