Analyzing text file with pandas and numpy

 

Analyzing text file with pandas and numpy

>>> import pandas as pd

>>> import numpy as np

>>> # Analyzing text file with pandas and numpy

def fan(filename):

         fl = open(filename, encoding = 'utf-8')

         rfl = fl.read()

         sfl = rfl.split()

         sen_fl = rfl.splitlines()

         fdist = [i for i in Counter(rfl.split()).most_common(10)]

         print(fdist, file = open('newnew.txt', 'a+', encoding = 'utf-8'))

         for w in sen_fl:

                 harf = [i for i in Counter(w.replace(' ', '')).most_common()]

                 kh = [k for k,v in harf]

                 vh = [v for k,v in harf]

                 kalim = [len(i) for i in w.split()]

                 nt = nltk.Text(w.split())

                 print(pd.Series([len(w.replace(' ', '')), numpy.mean(vh), numpy.median(vh),

                                 numpy.std(vh), numpy.var(vh), len(w.split()),

                                 numpy.mean(kalim), numpy.median(kalim), numpy.std(kalim),

                                 numpy.var(kalim)], index = ["char length", "char mean", "Avg char",

                                                             "char std", "char variance", "wordl length",

                                                             "word mean", "word variance", "word std",

                                                             "word median"]),

                      file = open('newnew.txt', 'a+', encoding = 'utf-8'))

 

The future update will be visualizing the text with different approach of text analyzing.


Comments