问题是我实际上是在尝试将二进制文件转换为ascii,然后分成大块,然后对于每4个大块,我想将它们保留在四列的熊猫数据帧中,因此每列将具有一块但是,当前o / p值不正确。作为输入行附加到熊猫的列表显示为列(额外),我尚未创建。请看看。
===========================================================
# Importing the required libraries
from os import walk
import binascii
import pandas as pd
import numpy as np
import chardet
#The following defintion of the function is the create chunks of sequences
def chunker(seq, size):
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
# to read and list the files in the folder
for (dirpath, dirnames, filenames) in walk('/Users/amathur1/PycharmProjects/learningpython/NAWF_VRG_G'):
count = 1
for file in filenames:
# to read all the files one by one and convert them into ASCII
# with open(dirpath+"/"+file, 'rb') as file1:
# base64_data = binascii.b2a_base64(file1.read())
# print(base64_data)
print(count, " : ", file)
count = count + 1
print("select file you want to convert")
input_file = input()
print("Selected file number is : ", input_file)
#To open the selected file
with open(dirpath + "/" + filenames[int(input_file) - 1], 'rb') as file:
# Reading the selected file i.e. file
read_file = file.read()
print(read_file[0:100])
# declaring pandas dataframe
df = pd.DataFrame(columns=('Col1', 'Col2', 'Col3', 'Col4'))
count = 0
Ip_list_2 = []
for group in chunker(read_file[0:100], 4):
# print('This is count ', count)
# converted_file = binascii.b2a_base64(group).decode('ascii')
# print("Converted_file is: ", converted_file)
if count == 4:
print('This is count 4 ,hence restarting')
Ip_list_2 = []
count = 0
else:
print('This is count ', count)
converted_file = binascii.b2a_base64(group).decode('ascii')
print("Converted_file is: ", converted_file)
#Ip_list.append(converted_file)
Ip_list = [converted_file]
Ip_list_2.append(Ip_list)
print("Ip_list is ", Ip_list_2, "\n")
# df = df.append(pd.series(Ip_list, index=['çol1', 'çol2', 'çol3', 'çol4']), ignore_index=True)
# df = pd.DataFrame(Ip_list, columns=['çol1', 'çol2', 'çol3', 'çol4'])
# df.append(Ip_list)
if count == 3:
# df1 = pd.DataFrame(Ip_list, columns=('Col1', 'Col2', 'Col3', 'Col4'))
# df = df.append(df1, ignore_index=True)
df = df.append(Ip_list_2,ignore_index=True)
count += 1
#df = df.append(pd.DataFrame(Ip_list, columns=['çol1', 'çol2', 'çol3', 'çol4']), ignore_index=True)
print(df)
============================================================
O/P
Users/amathur1/PycharmProjects/learningpython/venv/bin/python "/Users/amathur1/PycharmProjects/learningpython/June26_New code_file.py"
1 : .DS_Store
2 : .RData
3 : .Rhistory
4 : STOC_VRG_FR_INDEX_G_US.034
5 : STOC_VRG_FR_INDEX_G_US.048
6 : STOC_VRG_FR_INDEX_G_US.049
7 : STOC_VRG_FR_INDEX_G_US.050
8 : STOC_VRG_FR_INDEX_G_US.064
9 : STOC_VRG_FR_INDEX_G_US.065
select file you want to convert
5
Selected file number is : 5
b'A\xd9\xa5\x1ab\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x0b\xda\xa5\x1ab\x00\x00\x00\x02\x00\x00\x00\x02\x00\x00\x00\xcd\xdb\xa5\x1ab\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\xff\xdb\xa5\x1ab\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\xe9\xdc\xa5\x1ab\x00\x00\x00\x06\x00\x00\x00\x02\x00\x00\x00\xf7\xdc\xa5\x1ab\x00\x00\x00\x08\x00\x00\x00\x02\x00\x00\x00\x1b\xdd\xa5\x1a'
The total count of the sequence is : 4359984
The total of number of rows in the pandas dataframe would be : 272499.0
This is count 0
Converted_file is: QdmlGg==
Ip_list is [['QdmlGg==\n']]
This is count 1
Converted_file is: YgAAAA==
Ip_list is [['QdmlGg==\n'], ['YgAAAA==\n']]
This is count 2
Converted_file is: AAAAAA==
Ip_list is [['QdmlGg==\n'], ['YgAAAA==\n'], ['AAAAAA==\n']]
This is count 3
Converted_file is: AgAAAA==
Ip_list is [['QdmlGg==\n'], ['YgAAAA==\n'], ['AAAAAA==\n'], ['AgAAAA==\n']]
This is count 4 ,hence restarting
This is count 0
...
...
...
.
.
.
This is count 4 ,hence restarting
Col1 Col2 Col3 Col4 0
0 NaN NaN NaN NaN QdmlGg==\n
1 NaN NaN NaN NaN YgAAAA==\n
2 NaN NaN NaN NaN AAAAAA==\n
3 NaN NaN NaN NaN AgAAAA==\n
4 NaN NaN NaN NaN YgAAAA==\n
5 NaN NaN NaN NaN AgAAAA==\n
6 NaN NaN NaN NaN AgAAAA==\n
7 NaN NaN NaN NaN zdulGg==\n
8 NaN NaN NaN NaN BAAAAA==\n
9 NaN NaN NaN NaN AQAAAA==\n
10 NaN NaN NaN NaN /9ulGg==\n
11 NaN NaN NaN NaN YgAAAA==\n
12 NaN NaN NaN NaN AQAAAA==\n
13 NaN NaN NaN NaN 6dylGg==\n
14 NaN NaN NaN NaN YgAAAA==\n
15 NaN NaN NaN NaN BgAAAA==\n
16 NaN NaN NaN NaN 99ylGg==\n
17 NaN NaN NaN NaN YgAAAA==\n
18 NaN NaN NaN NaN CAAAAA==\n
19 NaN NaN NaN NaN AgAAAA==\n
Process finished with exit code 0