Learn practical skills, build real-world projects, and advance your career
Updated 2 years ago
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# sns.set_palette("colorblind")
sns.set(rc={'figure.figsize':(7, 7)})
pd.set_option('display.max_columns', None)
pd.set_option("display.float_format", lambda x: f'{x:.2f}')
import warnings
warnings.filterwarnings("ignore")
def std_col_names(df):
"""
- Convert feature names to lower case
"""
df.columns = df.columns.str.lower().str.strip().str.replace(' ', '_')
return df
os.listdir('./data/')
['sample_submission.csv', 'train.csv', 'test.csv']
# Load DF
# Please download the file from the files section of the page
### and use the appropriate path to load the file
df = std_col_names(pd.read_csv('./data/train.csv'))
df.head(5)
# Observe missing data and feature types
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45528 entries, 0 to 45527
Data columns (total 19 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 customer_id 45528 non-null object
1 name 45528 non-null object
2 age 45528 non-null int64
3 gender 45528 non-null object
4 owns_car 44981 non-null object
5 owns_house 45528 non-null object
6 no_of_children 44754 non-null float64
7 net_yearly_income 45528 non-null float64
8 no_of_days_employed 45065 non-null float64
9 occupation_type 45528 non-null object
10 total_family_members 45445 non-null float64
11 migrant_worker 45441 non-null float64
12 yearly_debt_payments 45433 non-null float64
13 credit_limit 45528 non-null float64
14 credit_limit_used(%) 45528 non-null int64
15 credit_score 45520 non-null float64
16 prev_defaults 45528 non-null int64
17 default_in_last_6months 45528 non-null int64
18 credit_card_default 45528 non-null int64
dtypes: float64(8), int64(5), object(6)
memory usage: 6.6+ MB