Add three models

This commit is contained in:
mkq 2024-06-18 23:05:25 +08:00
parent cab2d19493
commit cede016d86
25 changed files with 381 additions and 0 deletions

View File

@ -0,0 +1,2 @@
model_checkpoint_path: "Enpi_gzmat"
all_model_checkpoint_paths: "Enpi_gzmat"

View File

@ -0,0 +1,2 @@
model_checkpoint_path: "Epipi_gzmat"
all_model_checkpoint_paths: "Epipi_gzmat"

View File

@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
#使用神经网络方法预测:peptide_tran_E.txt,peptide_trans_Edip.txt
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
import numpy as np
from pandas import DataFrame as df
import tensorflow as tf
from sklearn import preprocessing
import sys
#定义特定后缀的文件的个数
def count_num(path,suffix):
num=0
for root,dirs,files in os.walk(path):
for name in files:
if name.endswith(suffix):
num=num+1
return num
#定义NN预测函数
def predict(x,model_dir,model_name):
with tf.compat.v1.Session() as sess:
init = tf.compat.v1.global_variables_initializer()
sess.run(init)
saver = tf.compat.v1.train.import_meta_graph(meta_graph_or_file=model_dir+model_name+".meta")
saver.restore(sess,model_dir+model_name)
graph = tf.compat.v1.get_default_graph()
xs = graph.get_tensor_by_name("x_inputs:0")
pred = graph.get_tensor_by_name("pred:0")
feed_dict = {xs: x}
y_test_pred = sess.run(pred,feed_dict=feed_dict)
return y_test_pred
#-----------载入内坐标,预测肽键的跃迁能----------------
#载入内坐标
peptide_gzmat=[]
with open("peptide_gzmat.txt", "r") as f:
for line in f.readlines():
peptide_gzmat_single = line.strip().split('\n\t')
for str in peptide_gzmat_single:
sub_str = str.split(',')
if sub_str:
peptide_gzmat.append(sub_str)
peptide_gzmat=np.array(peptide_gzmat,dtype=np.dtype(float).type).reshape(-1,9)
f.close()
#归一化
scaler= preprocessing.StandardScaler()
peptide_gzmat = scaler.fit_transform(peptide_gzmat)
#预测跃迁能
peptide_trans_Enpi = predict(x=peptide_gzmat,model_dir="./Enpi/",model_name="Enpi_gzmat")
tf.compat.v1.reset_default_graph()
peptide_trans_Epipi = predict(x=peptide_gzmat,model_dir="./Epipi/",model_name="Epipi_gzmat")
tf.compat.v1.reset_default_graph()
#把两个Enpi,Epipi交叉放到一个文件
peptide_trans_E = []
for i in range(peptide_trans_Enpi.shape[0]):
peptide_trans_E.append(peptide_trans_Enpi[i])
peptide_trans_E.append(peptide_trans_Epipi[i]+3.6)
peptide_trans_E=df(10000000/np.array(peptide_trans_E).reshape(-1,1))
peptide_trans_E.to_csv('peptide_trans_E_NN.txt',mode='w',header=False,index=False)

View File

@ -0,0 +1,19 @@
123.58,117.94,121.71,180.63,181.22,1.5687,1.2227,1.3080,1.5067
118.63,116.92,119.31,190.96,151.76,1.5365,1.2317,1.3454,1.4750
119.71,120.42,124.26,188.49,166.06,1.5453,1.2677,1.3244,1.4767
123.31,118.22,124.20,184.30,182.99,1.4803,1.2458,1.3599,1.4954
119.43,114.61,127.73,180.32,175.46,1.5532,1.2418,1.2884,1.4708
123.10,115.10,121.78,189.38,172.69,1.6069,1.2259,1.3648,1.4470
119.79,117.27,122.72,177.23,179.16,1.5599,1.2663,1.3485,1.4916
121.98,114.47,132.86,180.08,179.49,1.5710,1.2257,1.3225,1.4339
118.91,120.56,124.73,185.36,171.94,1.5249,1.2218,1.3118,1.4966
117.71,116.05,122.14,179.96,188.83,1.5053,1.2527,1.3350,1.5414
120.60,118.52,124.81,176.93,185.18,1.5241,1.2325,1.3236,1.4667
120.82,116.42,124.44,189.14,168.96,1.5544,1.2354,1.3717,1.4831
123.68,112.01,124.34,173.71,173.71,1.5038,1.2327,1.3617,1.4936
121.72,115.67,127.96,173.53,182.33,1.5333,1.2143,1.3101,1.4864
119.38,116.01,126.63,177.07,183.00,1.4949,1.2300,1.3255,1.5506
117.99,120.21,122.39,180.27,175.73,1.5879,1.2501,1.3563,1.4516
120.93,119.73,119.02,175.74,186.43,1.5595,1.2347,1.4298,1.4934
120.37,115.75,125.69,175.66,179.43,1.5723,1.2635,1.3783,1.4598
120.22,117.99,129.78,189.23,189.23,1.5167,1.2226,1.3156,1.4641

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -0,0 +1,228 @@
# -*- coding: utf-8 -*-
"""
:File: predict.py
:Author: Zhou Donglai
:Email: zhoudl@mail.ustc.edu.cn
"""
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
def load_data(file, use_im_freq=False):
x = pd.read_excel(file, sheet_name='descriptors')
y = pd.read_excel(file, sheet_name='predictive')
if not use_im_freq:
x.iloc[np.where(x['fre1'] < 0)[0], [0, 6]] = 0
x.iloc[np.where(x['fre2'] < 0)[0], [1, 7]] = 0
x.iloc[np.where(x['fre3'] < 0)[0], [2, 8]] = 0
return x, y
class CustomModel:
def __init__(self, layers=6, units=1024, rate=0., activation='relu', loss='mae', optimizer='adam',
max_learning_rate=1e-3, metrics=None, batch_size=120, max_epochs=10000, norm=None):
self.layers = layers
self.units = units
self.rate = rate
self.activation = activation
self.loss = loss
self.optimizer = optimizer
self.max_learning_rate = max_learning_rate
self.metrics = metrics
self.batch_size = batch_size
self.max_epochs = max_epochs
self.norm = norm
self._score_column = None
self.model = tf.keras.models.Sequential()
for i in range(self.layers - 1):
self.model.add(tf.keras.layers.Dense(self.units, activation=self.activation))
self.model.add(tf.keras.layers.Dropout(self.rate))
self.model.add(tf.keras.layers.Dense(8))
def fit(self, x, y, validation_data=None, **kwargs):
self.model.compile(loss=self.loss, optimizer=tf.keras.optimizers.get(
{'class_name': self.optimizer, 'config': {'learning_rate': self.max_learning_rate}}
), metrics=self.metrics)
if self.norm is None:
self.norm = [StandardScaler().fit(x), StandardScaler().fit(y)]
x_ = self.norm[0].transform(x)
y_ = self.norm[1].transform(y)
if validation_data is not None:
validation_data = (self.norm[0].transform(validation_data[0]),
self.norm[1].transform(validation_data[1]))
return self.model.fit(x_, y_, batch_size=self.batch_size, epochs=self.max_epochs,
validation_data=validation_data, **kwargs)
def predict(self, x, y_true=None, batch_size=12000, **kwargs):
x_ = self.norm[0].transform(x)
y_ = self.model.predict(x_, batch_size=batch_size, **kwargs)
y_pred = self.norm[1].inverse_transform(y_)
if y_true is not None:
y_true = np.array(y_true)
for i in range(y_pred.shape[0]):
if y_pred[i, 5] - y_true[i, 5] < -180:
y_pred[i, 5] += 360
if y_pred[i, 5] - y_true[i, 5] > 180:
y_pred[i, 5] -= 360
if y_pred[i, 5] < -180:
y_pred[i, 5] = -180
if y_pred[i, 5] > 180:
y_pred[i, 5] = 180
return y_pred
def score(self, x, y, norm=True, loss=None):
y_pred = self.predict(x, y)
if loss is None:
loss_func = tf.keras.losses.get(self.loss)
else:
loss_func = tf.keras.losses.get(loss)
if norm:
ls = loss_func(self.norm[1].transform(y).T,
self.norm[1].transform(y_pred).T)
return -ls.numpy()[self._score_column].mean()
else:
ls = loss_func(y.T, y_pred.T)
return -ls.numpy()[self._score_column]
def r2_score(self, x, y):
return r2_score(y, self.predict(x, y), multioutput='raw_values')
def save(self, file):
self.model.save(file)
joblib.dump(self.norm, file + '/norm.pkl')
def load(self, file):
# todo: Load params
self.model = tf.keras.models.load_model(file)
self.norm = joblib.load(file + '/norm.pkl')
return self
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, decay_epoch=100, decay=0.5, min_delta=1e-5,
monitor='loss', patience=15, min_lr=1e-6, factor=0.5):
super(CustomCallback, self).__init__()
self.decay_epoch = decay_epoch
self.decay = decay ** (1 / self.decay_epoch)
self.min_delta = min_delta
self.monitor = monitor
self.patience = patience
self.min_lr = min_lr
self.factor = factor
self.best = np.Inf
self.wait = 0
self.monitor_op = lambda a, b: np.less(a, b - self.min_delta)
def on_epoch_end(self, epoch, logs=None):
if epoch < self.decay_epoch:
self.model.optimizer.lr.assign(self.model.optimizer.lr * self.decay)
else:
current = logs.get(self.monitor)
if self.monitor_op(current, self.best):
self.best = current
self.wait = 0
else:
self.wait += 1
if self.wait >= self.patience:
self.model.optimizer.lr.assign(
self.model.optimizer.lr * self.factor)
print('Reduce learning rate to %e.'
% self.model.optimizer.lr.numpy())
if self.model.optimizer.lr.numpy() < self.min_lr:
self.model.stop_training = True
self.best = np.Inf
self.wait = 0
file = 'model'
model = CustomModel().load(file)
x, y = load_data('data.xlsx')
seed = 0
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed)
train_pred = model.predict(x_train, y_train)
test_pred = model.predict(x_test, y_test)
with pd.ExcelWriter(f'prediction.xlsx') as writer:
y_train.to_excel(writer, sheet_name='y_train', index=False)
pd.DataFrame(train_pred, columns=y.columns).to_excel(writer, sheet_name='train_pred', index=False)
y_test.to_excel(writer, sheet_name='y_test', index=False)
pd.DataFrame(test_pred, columns=y.columns).to_excel(writer, sheet_name='test_pred', index=False)
plt.rc('font', size=12)
fig = plt.figure(figsize=(20, 10))
l = 0.2
b = 0.16
w1 = 0.56
w2 = 0.2
s = 0.02
axes = [[] for i in range(8)]
for i, ax in enumerate(axes[:4]):
ax.append(fig.add_axes([(i + l) / 4, (1 + b) / 2, w1 / 4, w1 / 2]))
ax.append(fig.add_axes([(i + l) / 4, (1 + b + w1 + s) / 2, w1 / 4, w2 / 2]))
ax.append(fig.add_axes([(i + l + w1 + s) / 4, (1 + b) / 2, w2 / 4, w1 / 2]))
for i, ax in enumerate(axes[4:]):
ax.append(fig.add_axes([(i + l) / 4, b / 2, w1 / 4, w1 / 2]))
ax.append(fig.add_axes([(i + l) / 4, (b + w1 + s) / 2, w1 / 4, w2 / 2]))
ax.append(fig.add_axes([(i + l + w1 + s) / 4, b / 2, w2 / 4, w1 / 2]))
fig.text(0, 1, '$\\mathbf{a}$', fontsize=24, ha='left', va='top')
fig.text(1 / 4, 1, '$\\mathbf{b}$', fontsize=24, ha='left', va='top')
fig.text(1 / 2, 1, '$\\mathbf{c}$', fontsize=24, ha='left', va='top')
fig.text(3 / 4, 1, '$\\mathbf{d}$', fontsize=24, ha='left', va='top')
fig.text(0, 1 / 2, '$\\mathbf{e}$', fontsize=24, ha='left', va='top')
fig.text(1 / 4, 1 / 2, '$\\mathbf{f}$', fontsize=24, ha='left', va='top')
fig.text(1 / 2, 1 / 2, '$\\mathbf{g}$', fontsize=24, ha='left', va='top')
fig.text(3 / 4, 1 / 2, '$\\mathbf{h}$', fontsize=24, ha='left', va='top')
for i, ax in enumerate(axes):
for a in ax:
a.tick_params(direction='in')
y_min = min(y_test.iloc[:, i].min(), test_pred[:, i].min())
y_max = max(y_test.iloc[:, i].max(), test_pred[:, i].max())
lim = [y_min - (y_max - y_min) * 0.05, y_max + (y_max - y_min) * 0.05]
ax[0].set_aspect('equal')
ax[0].plot(lim, lim, '--', lw=1, c='k', zorder=2)
ax[0].set_xticks(ax[0].get_xticks())
ax[0].set_yticks(ax[0].get_xticks())
ax[0].set_xlim(lim)
ax[0].set_ylim(lim)
ax[0].scatter(y_test.iloc[:, i], test_pred[:, i], s=10, zorder=1)
ax[0].text(y_min, y_max, f"$r={np.corrcoef(y_test.iloc[:, i], test_pred[:, i])[0, 1]:.3f}$",
ha='left', va='top', fontsize=16)
ax[0].set_xlabel(f'$\\mathrm{{{y_test.columns[i]}\\ (Cal.)}}$', fontsize=20)
ax[0].set_ylabel(f'$\\mathrm{{{y_test.columns[i]}\\ (NN)}}$', fontsize=20)
ax[1].set_xticks(ax[0].get_xticks())
ax[1].set_xlim(lim)
ax[1].tick_params(labelbottom=False)
ax[1].hist(y_test.iloc[:, i], bins=np.linspace(y_min, y_max, 16))
ax[1].set_yticks([])
ax[1].set_ylabel('$\\mathrm{Cal.\\ Distr.}$', fontsize=16)
ax[2].set_yticks(ax[0].get_xticks())
ax[2].set_ylim(lim)
ax[2].tick_params(labelleft=False)
ax[2].hist(test_pred[:, i], bins=np.linspace(y_min, y_max, 16), orientation='horizontal')
ax[2].set_xticks([])
ax[2].set_xlabel('$\\mathrm{NN\\ Distr.}$', fontsize=16)
fig.savefig(f'prediction.png', dpi=300)

Binary file not shown.

View File

@ -0,0 +1,48 @@
#加载预训练的模型
import pandas as pd
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.nn import Module
from sklearn.preprocessing import StandardScaler
import pickle
excel = 'data.xlsx'
X = pd.read_excel(excel, sheet_name='metals')
f = open('norm_x.pckl', 'rb')
norm_x = pickle.load(f)
f.close()
f = open('norm_y.pckl', 'rb')
norm_y = pickle.load(f)
f.close()
X_ = norm_x.transform(X) #进行x_train的归一化
##预测d带中心
class Model(Module):
def __init__(self):
super(Model,self).__init__()
self.Layer1 = nn.Linear(in_features=5,out_features=512)
self.Layer2 = nn.Linear(in_features=512,out_features=384)
self.Layer3 = nn.Linear(in_features=384,out_features=192)
self.Layer4 = nn.Linear(in_features=192, out_features=5)
def forward(self,x):
x = F.relu(self.Layer1(x))
x = F.relu(self.Layer2(x))
x = F.relu(self.Layer3(x))
x = self.Layer4(x)
return x
premodel=torch.load('premodel.pkl')
premodel.eval()
x_=torch.Tensor(X_) #转为float32类型
y_pred_ = premodel(x_)
y_pred = y_pred_.detach().numpy()
y_pred = norm_y.inverse_transform(y_pred)
y_pred =pd.DataFrame(y_pred,columns=['G','dband_ave_2OH','dband_ave_OH','dband_med_2OH','dband_med_OH'])
y_pred.to_excel('y_predict.xlsx')

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.