我正在尝试编写一个程序,它将数据文件作为输入,将其转换为DataFrame并使用包Crypto加密给定列。
输出应包含两个DataFrame /文件。第一个文件应包含加密列的整个DataFrame。第二个文件包含带密钥的加密列。
我的代码工作得很好,因为它给了我想要的输出。解密时会出现问题。不知何故,从第二个文件复制密钥并不能使用函数decryption
。然而,解密工作在程序之外,例如加密字符串" python",加密的字符串和密钥输出到控制台正常工作。
我认为它与从/向文件读/写时的编码有关。我已尝试指定编码(" utf")但它没有帮助。
任何帮助都会非常感激。由于我不熟悉编码,因此欢迎任何类型的关于编程风格的建设性批评。
# The program takes a file as an input - identifies the format (csv or excel) automatically.
# The output is two files. The dataframe with the column(s) encrypted and another dataframe with the encrypted column(s) plus the keys.
import pandas as pd
import sys, os, base64
from Crypto.Cipher import AES
### define a class with the attributes file_path und file_format
class InputFile:
def __init__(self, file_path):
self.file_path = file_path # File path
self.file_format = file_path.split('.')[-1]
# If the file is a csv, ask whether it is seperated with a comma or a semicolon
if self.file_format == 'csv':
self.separator = input('How are the values separated? (, or ;): ')
if self.separator == 'quit':
sys.exit('You quit the program')
if self.file_format == 'xlsx' or self.file_format == 'xls':
# For Excel, the sheetname should be provided as well
self.sheet_name = input('Please provide the sheetname: \n')
if self.sheet_name == 'quit':
sys.exit('You quit the program')
# funktion um die Datei einzulesen, abhängig von dem Format
def readfile_whole(self):
if self.file_format == 'csv':
data = pd.read_csv(self.file_path, sep = self.separator)
if self.file_format == 'xlsx' or self.file_format == 'xls':
data = pd.read_excel(self.file_path, sheetname = self.sheet_name)
return data
# Funktion um die Spalten zu lesen ,die verschlüsselt werden sollen
def readfile_cols(self):
print('Please provide a list of columns you want encrypted. When finished, press Enter without typing')
in_data = self.readfile_whole()
list_of_cols = []
while True:
inp = input().rstrip(' ')
if inp == 'quit':
sys.exit('You quit the program')
if inp == '':
break
if inp in list(in_data):
list_of_cols.append(inp)
return in_data[list_of_cols]
def encryption(inputcol):
block = 16 #bytes encryption
padding = '{'
pad = lambda s: s + (block - len(s) % block) * padding
#string gets padded with this Padding depending on its size
encryptAES = lambda c,s: base64.b64encode(c.encrypt(pad(s)))
#declare function "encodeAES". A cipher (AES) is used to encrypt the
#padded string. Additionally, the encrypted string is encoded in b64 format
keys = [] #initialize a keys column
encryptedcol = [] #initialize a encrypted column
for item in inputcol:
key = base64.b64encode(os.urandom(16)) #generate a random key of size 16 bytes
keys.append(key)
for key,item in zip(keys,inputcol):
cipher = AES.new(key) #creates a cipher out of the generated key
encrypted_item = encryptAES(cipher, str(item)) #encrypt the
encryptedcol.append(encrypted_item)
return [keys, encryptedcol]
def decryption(key, encoded):
padding = '{'
decodeAES = lambda c, e: c.decrypt(base64.b64decode(bytes(e, 'utf'))).rstrip(padding)
#key = base64.b64decode(key)
cipher = AES.new(key)
decoded_item = decodeAES(cipher, encoded)
return decoded_item
def main():
choice = input('\n\t Welcome to Encrypto! To quit at any arbitrary point, type \'quit\'.\nDo you want to [e]ncrpt or [d]ecrypt?: ')
#give the user a choice on how he wants to proceed
if choice == 'e':
path = input('Please provide the folder Path: ') # path of the file to be encrypted
if path == 'quit':
sys.exit('You quit the program')
in_file = inputfile(path) #file is a class inputfile defined above
in_data = in_file.readfile_cols() #read the columns from the given
#file as a dataframe
in_data_keys = in_data.copy() # --''--
for column in in_data:
col = in_data[column].tolist()
in_data[column] = encryption(col)[1]
in_data_keys[column] = encryption(col)[0]
# a for-loop which runs through the selected columns, and encrypts them
#returns a tuple where the first item is the key, the second the encrypted
#column
print(in_data, in_data_keys)
main_data = in_file.readfile_whole() #read the whole file
main_data.update(in_data) #switch the original column with the -
new_data = main_data # - encrypted column
out_name_data = input('Please provide the name for the output file: ' )
out_name_keys = input('Please provide the name for the output keys file: ' )
# names of the output files
output_path = input('Please provide the path for the output data: ')
new_data.to_csv(output_path + out_name_data + '.csv', sep=';')
in_data_keys = pd.concat([in_data_keys,in_data], axis = 1) #concatenate the keys and encrypted column
in_data_keys.to_csv(output_path + out_name_keys + '.csv', sep=';')
if choice == 'd':
key = input('Please provide the key: ')
encoded = input('Please provide the encrypted string: ')
print(decryption(key, encoded))
if __name__ == '__main__':
Main()
读取文件时可能会出现问题:
def readfile_whole(self):
if self.file_format == 'csv':
data = pd.read_csv(self.file_path, sep = self.separator)
if self.file_format == 'xlsx' or self.file_format == 'xls':
data = pd.read_excel(self.file_path, sheetname = self.sheet_name)
return data
# Funktion um die Spalten zu lesen ,die verschlüsselt werden sollen
def readfile_cols(self):
print('Please provide a list of columns you want encrypted. When finished, press Enter without typing')
in_data = self.readfile_whole()
list_of_cols = []
while True:
inp = input().rstrip(' ')
if inp == 'quit':
sys.exit('You quit the program')
if inp == '':
break
if inp in list(in_data):
list_of_cols.append(inp)
return in_data[list_of_cols]
或在加密时:
def encryption(inputcol):
block = 16 #bytes encryption
padding = '{'
pad = lambda s: s + (block - len(s) % block) * padding
#string gets padded with this Padding depending on its size
encryptAES = lambda c,s: base64.b64encode(c.encrypt(pad(s)))
#declare function "encodeAES". A cipher (AES) is used to encrypt the
#padded string. Additionally, the encrypted string is encoded in b64 format
keys = [] # initialize a keys column
encryptedcol = [] # initialize a encrypted column
for item in inputcol:
key = base64.b64encode(os.urandom(16)) # generate a random key of size 16 bytes
keys.append(key)
for key,item in zip(keys,inputcol):
cipher = AES.new(key) #creates a cipher out of the generated key
encrypted_item = encryptAES(cipher, str(item)) #encrypt the
encryptedcol.append(encrypted_item)
return [keys, encryptedcol]
为了在csv文件上编写Dataframes,正在使用pandas函数to_csv
。