Learn practical skills, build real-world projects, and advance your career
Updated 4 years ago
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('student_scores.csv')
df.head()
X = df['Hours'].values
y = df['Scores'].values
plt.scatter(X,y)
plt.show()
#equation of the line y = mx + c
#let's calculate m and c
#as we dont know the value of m and c Lets take it as 0.
m = 2 * np.random.rand()
b = 2 * np.random.rand()
y_pred = m*X + b
y_pred
array([ 6.21092103, 10.88165693, 7.46842685, 16.98954233, 8.00735791,
4.41448415, 18.24704815, 11.60023168, 16.63025495, 6.57020841,
15.55239282, 12.31880643, 9.8037948 , 7.64807054, 3.69590939,
17.70811708, 6.21092103, 5.1330589 , 12.67809381, 15.01346176,
6.57020841, 10.34272586, 8.54628898, 14.11524331, 15.73203651])
# Mean Squared Error
def msr(y,y_pred):
diff = y - y_pred
return 1/len(diff) * np.sum(diff**2)
msr(y, y_pred)
2079.6986875826638