"""Copy of Copy of Copy of Copy of model.ipynb
Automatically generated by Colaboratory.
Original file is located at
In this notebook, I am going to implement a simple linear regression model with tensorflow. We are going to predict the Hardness of a welded material in term of the welding power, speed, time.
"""
# importing the tensorflow package and other auxilary packages
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow import keras
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,PolynomialFeatures
from sklearn.datasets import make_classification
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
sns.set()
"""##1. Loading the data with pandas"""
# Loading the data with the pandas read_csv attribute
from google.colab import files
files.upload()
df=pd.read_excel('data_1004.xlsx')
data = pd.DataFrame(df)
def boxplot(df,x,y):
ax = sns.boxplot(x="x", y="x", data=df)
# Renaming the feature and looking at the data
data
# The shape of our data.
data.shape
# looking at the type of the data
data.dtypes
"""##1. Box plotting"""
ax = sns.boxplot(x="S-temp", y="Hardness", data=df)
ax = sns.boxplot(x="S-time", y="Hardness", data=df)
ax = sns.boxplot(x="Wel-power", y="Hardness", data=df)
ax = sns.boxplot(x="Wel-speed", y="Hardness", data=df)
df.plot(kind='box',figsize=(15,15), subplots=True, layout=(3,3), sharex=False, sharey=False)
plt.show()
"""One could do more plotting but we are ok now to build up our model. But first of all let us write a function that will help normalize our data.
##3. Definig the function to normalize the data
Normalization is very important in machine learning as building a model on a raw data set may result in poor performance of the model. It is always advisable to do so before feeding the data into your machine learning algorithm.
"""
#This function will return the normalized data
def Normalize(x):
return (x-np.mean(x))/np.std(x)
"""##4- Splitting the data in test and train set"""
#I steal this from tensorflow tutorial. One could also used the split function in sklearn.
train_set=data.sample(frac=0.75,random_state=0)
test_set=data.drop(train_set.index)
"""## 5. Data visualisation and statistics"""
sns.set() sns.pairplot(train_set, height=3);
train_set.describe()
"""## 6. Defining the labels"""
train_labels=train_set.pop('Hardness')
test_labels=test_set.pop('Hardness')
"""##3. Defining and compiling the model
Next we will create the simplest possible neural network. It has 1 layer, and that layer has 1 neuron, and the input shape to it is just 1 value.
"""
def build_model():
model = keras.Sequential([
keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[len(train_set.keys())]),
keras.layers.Dense(64, activation=tf.nn.relu),
keras.layers.Dense(1)
])
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae','mse'])
return model
model=build_model()
model.summary()
"""##
Training the model
Here we are going to train our model by feeding into the model the training sets of data (features and labels).
"""
history = model.fit(
train_set, train_labels,
epochs=150, validation_split = 0.2, verbose=0)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
"""## Plotting the results"""
def plot_history(history):
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('MAE for concrete strength')
plt.plot(hist['epoch'], hist['mean_absolute_error'],
label='Train
Error') plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
label = 'Val Error')
#plt.ylim([0,5])
plt.legend()
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('MSE [concrete strength]')
plt.plot(hist['epoch'], hist['mean_squared_error'],
label='Train Error')
plt.plot(hist['epoch'], hist['val_mean_squared_error'],
label = 'Val Error')
#plt.ylim([0,20])
plt.legend()
plt.show()
plot_history(history)
model=build_model()
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(train_set, train_labels, epochs=150,
validation_split = 0.2, verbose=0, callbacks=[early_stop])
plot_history(history)
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
loss, mae, mse = model.evaluate(test_set, test_labels, verbose=0)
print("Testing set Mean Abs Error: {:5.2f} Hardness".format(mae))
test_predictions = model.predict(test_set).flatten()
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [Hardness]')
plt.ylabel('Predictions [Hardness]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([−1000, 1000], [−1000, 1000])
test_predictions
test_labels
Normalize(train_set)
Normalize(train_labels)
sns.pairplot(data, hue="Hardness", palette="husl")
"""Plotting *italicized text*"""
df.plot(kind='density',figsize=(15,15), subplots=True, layout=(3,3), sharex=False)
plt.show()
import seaborn as sns; sns.set(style="ticks", color_codes=True)
sns.pairplot(data)
sns.pairplot(data, hue="Hardness")
sns.pairplot(data, hue="Wel-speed", markers=["o", "s"])
sns.pairplot(data, kind="reg")
corr_matrix=df.corr()
corr_matrix
names=['S-temp','S-time','Wel-power','Wel-speed','Hardness']
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(corr_matrix, vmin=−1, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0,5,1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)
plt.show()
"""**ANOVA ANALYSIS**"""
df.corr()
"""We can use the Pandas method corr() to find the feature other than price that is most correlated wit
price"""
df.corr()['Hardness'].sort_values()
sns.regplot(x="Hardness", y="Wel-speed", data=df)
plt.ylim(0,)
df[["Hardness", "Wel-speed"]].corr()
sns.regplot(x="Hardness", y="Wel-power", data=df)
plt.ylim(0,)
df[["Hardness", "Wel-power"]].corr()
sns.regplot(x="S-temp", y="Hardness", data=df)
plt.ylim(0,)
df[["Hardness", "S-temp"]].corr()
sns.regplot(x="Hardness", y="S-time", data=df)
plt.ylim(0,)
df[["Hardness", "S-time"]].corr()
from scipy import stats
pearson_coef, p_value = stats.pearsonr(df['Hardness'], df['Wel-power'])
print("The pearson Correlation Coefficient is", pearson_coef, "with a p-value of p =", p_value)
pearson_coef, p_value = stats.pearsonr(df['Hardness'], df['S-temp'])
print("The pearson Correlation Coefficient is", pearson_coef, "with a p-value of p =", p_value)
pearson_coef, p_value = stats.pearsonr(df['Hardness'], df['S-time'])
print("The pearson Correlation Coefficient is", pearson_coef, "with a p-value of p =", p_value)
pearson_coef, p_value = stats.pearsonr(df['Hardness'], df['Wel-speed'])
print("The pearson Correlation Coefficient is", pearson_coef, "with a p-value of p =", p_value)
"""To see if different types 'Wel-power' impact 'Hardness"
**MODEL DEVELOPMENT**
"""
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
X = df[['Wel-power']]
Y = df['Hardness']
lm = LinearRegression()
lm.fit(X,Y)
lm.score(X, Y)
features =["S-temp", "S-time","Wel-power","Wel-speed"]
"""We can Fit a linear regression model using the longitude feature 'long' and caculate the R^2."""
X=df[['S-temp','S-time','Wel-power','Wel-speed']]
Y=df['Hardness']
lm.fit(X,Y)
lm.score(X,Y)
"""Create a list of tuples, the first element in the tuple contains the name of the estimator:'scale'
'polynomial' 'model'"""
Input=[('scale',StandardScaler()),('polynomial',
PolynomialFeatures(include_bias=False)),('model',LinearRegression())]
"""We use the list to create a pipeline object, predict the 'price', fit the object using the features in the list features, then fit the model and calculate the R^2″""
Input= [('scale', StandardScaler()), ('polynomial',
PolynomialFeatures(include_bias=False)),('model',LinearRegression())]
pipe=Pipeline(Input)
pipe
X=df[['S-temp','S-time','Wel-power','Wel-speed']]
Y=df['Hardness']
pipe.fit(X,Y)
pipe.score(X,Y)
"""**MODEL EVALUATION AND REFINEMENT**"""
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
print("done")
we will split the data into training and testing set""" features =["S-temp", "S-time","Wel-power","Wel-speed"]
X = df[features]
Y = df['Hardness']
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.15, random_state=1)
print("number of test samples:", x_test.shape[0])
print("number of training samples:",x_train.shape[0])
Create and fit a Ridge regression object using the training data, setting the regularization parameter to 0.1 and calculate the R^2 using the test data.**"""
from sklearn.linear_model import Ridge
RigeModel = Ridge(alpha=0.1)
RigeModel.fit(x_train, y_train)
yhat = RigeModel.predict(x_test)
Rsqu_test= RigeModel.score (x_test,y_test)
print(Rsqu_test)
"""**Perform a second order polynomial transform on both the training data and testing data. Create and fit a Ridge regression object using the training data, setting the regularisation parameter to 0.1. Calculate the R^2**"""
pr=PolynomialFeatures(degree=2)
x_train_pr= pr.fit_transform(x_train[['S-temp', 'S-time','Wel-power','Wel-speed']])
x_test_pr= pr.fit_transform(x_test[['S-temp', 'S-time','Wel-power','Wel-speed']])
RigeModel.fit(x_train_pr, y_train)
yhat = RigeModel.predict(x_test_pr)
Rsqu_test= RigeModel.score (x_test_pr,y_test)
print(Rsqu_test)
"""**Artificial Neural Network Analysis**"""
input_vector = np.array([2, 4, 11])
print(input_vector)
input_vector = np.array(input_vector, ndmin=2).T
print(input_vector, input_vector.shape)
import numpy as np
number_of_samples = 30
low = −1
high = 0
s = np.random.uniform(low, high, number_of_samples)
# all values of s are within the half open interval [−1, 0):
print(np.all(s >= −1) and np.all(s < 0))
plt.hist(s)
plt.show()
s = np.random.binomial(10, 0.5, 30)
plt.hist(s)
plt.show()
from scipy.stats import truncnorm
s = truncnorm(a=−2/3., b = 2/3., scale=1, loc=0).rvs(size=1000)
plt.hist(s)
plt.show()
def truncated_normal(mean=0, sd=1, low=0, upp=10):
return truncnorm(
(low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)
X = truncated_normal(mean=0, sd=0.4, low=−0.5, upp=0.5)
s = X.rvs(10,000)
plt.hist(s)
plt.show()
X1 = truncated_normal(mean=2, sd=1, low=1, upp=10)
X2 = truncated_normal(mean=5.5, sd=1, low=1, upp=10)
X3 = truncated_normal(mean=8, sd=1, low=1, upp=10)
import matplotlib.pyplot as plt
fig, ax = plt.subplots(3, sharex=True)
ax[0].hist(X1.rvs(10000), normed=True)
ax[1].hist(X2.rvs(10000), normed=True)
ax[2].hist(X3.rvs(10000), normed=True)
plt.show()
no_of_input_nodes = 3
no_of_hidden_nodes = 4
rad = 1 / np.sqrt(no_of_input_nodes)
X = truncated_normal(mean=2, sd=1, low=-rad, upp=rad)
wih = X.rvs((no_of_hidden_nodes, no_of_input_nodes))
wih
no_of_hidden_nodes = 4
no_of_output_nodes = 2
rad = 1 / np.sqrt(no_of_hidden_nodes) # this is the input in this layer!
X = truncated_normal(mean=2, sd=1, low=-rad, upp=rad)
who = X.rvs((no_of_output_nodes, no_of_hidden_nodes))
Do you have any questions about this protocol?
Post your question to gather feedback from the community. We will also invite the authors of this article to respond.
Tips for asking effective questions
+ Description
Write a detailed description. Include all information that will help others answer your question including experimental processes, conditions, and relevant images.