Vectorize Text Using Different Techniques for Data Mining the Mahābhārata

import sklearn

print(sklearn.__version__)
0.20.3
with open('/content/drive/MyDrive/beproject/rawdata/maha01.txt') as f:
  maha01 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha02.txt') as f:
  maha02 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha03.txt') as f:
  maha03 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha04.txt') as f:
  maha04 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha05.txt') as f:
  maha05 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha06.txt') as f:
  maha06 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha07.txt') as f:
  maha07 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha08.txt') as f:
  maha08 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha09.txt') as f:
  maha09 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha10.txt') as f:
  maha10 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha11.txt') as f:
  maha11 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha12.txt') as f:
  maha12 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha13.txt') as f:
  maha13 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha14.txt') as f:
  maha14 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha15.txt') as f:
  maha15 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha16.txt') as f:
  maha16 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha17.txt') as f:
  maha17 = f.read()
with open('/content/drive/MyDrive/beproject/rawdata/maha18.txt') as f:
  maha18 = f.read()

CountVectorizer

from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive