Learn practical skills, build real-world projects, and advance your career
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
# sns.set_palette("colorblind")
sns.set(rc={'figure.figsize':(7, 7)})

pd.set_option('display.max_columns', None)
pd.set_option("display.float_format", lambda x: f'{x:.2f}')

import warnings
warnings.filterwarnings("ignore")
def std_col_names(df):
    """
    - Convert feature names to lower case
    """
    df.columns = df.columns.str.lower().str.strip().str.replace(' ', '_')
    return df
os.listdir('./data/')
['sample_submission.csv', 'train.csv', 'test.csv']
# Load DF
# Please download the file from the files section of the page
### and use the appropriate path to load the file
df = std_col_names(pd.read_csv('./data/train.csv'))
df.head(5)
# Observe missing data and feature types
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 45528 entries, 0 to 45527 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 customer_id 45528 non-null object 1 name 45528 non-null object 2 age 45528 non-null int64 3 gender 45528 non-null object 4 owns_car 44981 non-null object 5 owns_house 45528 non-null object 6 no_of_children 44754 non-null float64 7 net_yearly_income 45528 non-null float64 8 no_of_days_employed 45065 non-null float64 9 occupation_type 45528 non-null object 10 total_family_members 45445 non-null float64 11 migrant_worker 45441 non-null float64 12 yearly_debt_payments 45433 non-null float64 13 credit_limit 45528 non-null float64 14 credit_limit_used(%) 45528 non-null int64 15 credit_score 45520 non-null float64 16 prev_defaults 45528 non-null int64 17 default_in_last_6months 45528 non-null int64 18 credit_card_default 45528 non-null int64 dtypes: float64(8), int64(5), object(6) memory usage: 6.6+ MB