Learn practical skills, build real-world projects, and advance your career
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('student_scores.csv')
df.head()
X = df['Hours'].values
y = df['Scores'].values
plt.scatter(X,y)
plt.show()
Notebook Image
#equation of the line y = mx + c
#let's calculate m and c
#as we dont know the value of m and c Lets take it as 0.
m = 2 * np.random.rand()
b = 2 * np.random.rand()
y_pred = m*X + b
y_pred
array([ 6.21092103, 10.88165693,  7.46842685, 16.98954233,  8.00735791,
        4.41448415, 18.24704815, 11.60023168, 16.63025495,  6.57020841,
       15.55239282, 12.31880643,  9.8037948 ,  7.64807054,  3.69590939,
       17.70811708,  6.21092103,  5.1330589 , 12.67809381, 15.01346176,
        6.57020841, 10.34272586,  8.54628898, 14.11524331, 15.73203651])
# Mean Squared Error
def msr(y,y_pred):
    diff = y - y_pred
    return 1/len(diff) * np.sum(diff**2)
msr(y, y_pred)
2079.6986875826638