Learn practical skills, build real-world projects, and advance your career
Updated 4 years ago
#import statements
import numpy as np
import os
import tarfile
import urllib
import torch
import jovian
import seaborn as sns
import torchvision
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
import sklearn
from sklearn.impute import SimpleImputer
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split
import _pickle as cPickle
DATA_FILENAME = "train.csv"
df = pd.read_csv(DATA_FILENAME)
df.head()
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 PassengerId 891 non-null int64
1 Survived 891 non-null int64
2 Pclass 891 non-null int64
3 Name 891 non-null object
4 Sex 891 non-null object
5 Age 714 non-null float64
6 SibSp 891 non-null int64
7 Parch 891 non-null int64
8 Ticket 891 non-null object
9 Fare 891 non-null float64
10 Cabin 204 non-null object
11 Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
num_rows = len(df.index)
num_cols = len(df.columns)
print("This dataframe is "+f"{num_rows}" +" by "+f"{num_cols}")
This dataframe is 891 by 12