Learn practical skills, build real-world projects, and advance your career
Updated 4 years ago
import pandas as pd
import numpy as np
import itertools
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
import matplotlib.ticker as ticker
from sklearn import preprocessing
%matplotlib inline
df=pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/ML0101ENv3/labs/teleCust1000t.csv')
df.head()
X = df[['region', 'tenure','age', 'marital', 'address', 'income', 'ed', 'employ','retire', 'gender', 'reside']] .values #.astype(float)
X[0:5]
array([[ 2., 13., 44., 1., 9., 64., 4., 5., 0., 0., 2.],
[ 3., 11., 33., 1., 7., 136., 5., 5., 0., 0., 6.],
[ 3., 68., 52., 1., 24., 116., 1., 29., 0., 1., 2.],
[ 2., 33., 33., 0., 12., 33., 2., 0., 0., 1., 1.],
[ 2., 23., 30., 1., 9., 30., 1., 2., 0., 0., 4.]])
X=preprocessing.StandardScaler().fit(X).transform(X.astype(float))
X
array([[-0.02696767, -1.055125 , 0.18450456, ..., -0.22207644,
-1.03459817, -0.23065004],
[ 1.19883553, -1.14880563, -0.69181243, ..., -0.22207644,
-1.03459817, 2.55666158],
[ 1.19883553, 1.52109247, 0.82182601, ..., -0.22207644,
0.96655883, -0.23065004],
...,
[ 1.19883553, 1.47425216, 1.37948227, ..., -0.22207644,
0.96655883, -0.92747794],
[ 1.19883553, 1.61477311, 0.58283046, ..., -0.22207644,
0.96655883, -0.92747794],
[ 1.19883553, 0.67796676, -0.45281689, ..., -0.22207644,
0.96655883, 0.46617787]])