Analyzing text file with pandas and numpy
Analyzing text file with pandas and numpy
>>> import pandas as pd
>>> import numpy as np
>>> # Analyzing text file with pandas and numpy
def fan(filename):
fl = open(filename,
encoding = 'utf-8')
rfl = fl.read()
sfl = rfl.split()
sen_fl =
rfl.splitlines()
fdist = [i for i in
Counter(rfl.split()).most_common(10)]
print(fdist, file =
open('newnew.txt', 'a+', encoding = 'utf-8'))
for w in sen_fl:
harf = [i for
i in Counter(w.replace(' ', '')).most_common()]
kh = [k for
k,v in harf]
vh = [v for
k,v in harf]
kalim =
[len(i) for i in w.split()]
nt =
nltk.Text(w.split())
print(pd.Series([len(w.replace('
', '')), numpy.mean(vh), numpy.median(vh),
numpy.std(vh), numpy.var(vh), len(w.split()),
numpy.mean(kalim), numpy.median(kalim), numpy.std(kalim),
numpy.var(kalim)], index =
["char length", "char mean", "Avg char",
"char std", "char variance", "wordl
length",
"word mean", "word variance", "word std",
"word median"]),
file =
open('newnew.txt', 'a+', encoding = 'utf-8'))
The future update will be visualizing the text with different
approach of text analyzing.
Comments
Post a Comment