Learn practical skills, build real-world projects, and advance your career
import torch
import tarfile
import os
from torch.utils.data import Dataset
from torchvision.datasets.utils import download_url
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
import sklearn
import matplotlib
import csv
from PIL import Image

#Data

data = download_url("http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz", "/content/")

with tarfile.open('/content/speech_commands_v0.01.tar.gz', 'r:gz') as tar:
    tar.extractall(path='./data')
Downloading http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz to /content/speech_commands_v0.01.tar.gz
HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))
data, sampling_rate = librosa.load('/content/data/one/00176480_nohash_0.wav')
data, data.shape, sampling_rate
(array([-2.9995823e-05, -1.6769719e-04, -3.9024639e-04, ...,
        -6.1316596e-04, -3.3654648e-04, -1.7320579e-04], dtype=float32),
 (22050,),
 22050)
digit = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']
for x in digit:
    print(x, ": ", len(os.listdir('/content/data/'+x)))

#Balanced
zero : 2376 one : 2370 two : 2373 three : 2356 four : 2372 five : 2357 six : 2369 seven : 2377 eight : 2352 nine : 2364