使用pandas进行AES加密读写 - python 2.7

时间:2017-10-07 13:12:17

标签: python pandas encryption cryptography aes

我正在尝试编写一个程序,它将数据文件作为输入,将其转换为DataFrame并使用包Crypto加密给定列。

输出应包含两个DataFrame /文件。第一个文件应包含加密列的整个DataFrame。第二个文件包含带密钥的加密列。

我的代码工作得很好,因为它给了我想要的输出。解密时会出现问题。不知何故,从第二个文件复制密钥并不能使用函数decryption。然而,解密工作在程序之外,例如加密字符串" python",加密的字符串和密钥输出到控制台正常工作。

我认为它与从/向文件读/写时的编码有关。我已尝试指定编码(" utf")但它没有帮助。

任何帮助都会非常感激。由于我不熟悉编码,因此欢迎任何类型的关于编程风格的建设性批评。

加密程序

# The program takes a file as an input - identifies the format (csv or excel) automatically.
# The output is two files. The dataframe with the column(s) encrypted and another dataframe with the encrypted column(s) plus the keys.


import pandas as pd
import sys, os, base64
from Crypto.Cipher import AES


### define a class with the attributes file_path und file_format

class InputFile:
    def __init__(self, file_path):
        self.file_path = file_path # File path 
        self.file_format = file_path.split('.')[-1]
        # If the file is a csv, ask whether it is seperated with a comma or a semicolon 
        if self.file_format == 'csv':
            self.separator = input('How are the values separated? (, or ;): ')
            if self.separator == 'quit':
                sys.exit('You quit the program')
        if self.file_format == 'xlsx' or self.file_format == 'xls':
            # For Excel, the sheetname should be provided as well
            self.sheet_name = input('Please provide the sheetname: \n')
            if self.sheet_name == 'quit':
                sys.exit('You quit the program')    


    # funktion um die Datei einzulesen, abhängig von dem Format
    def readfile_whole(self):
        if self.file_format == 'csv':
            data = pd.read_csv(self.file_path, sep = self.separator)
        if self.file_format == 'xlsx' or self.file_format == 'xls':
            data = pd.read_excel(self.file_path, sheetname = self.sheet_name)
        return data

    # Funktion um die Spalten zu lesen ,die verschlüsselt werden sollen
    def readfile_cols(self):
        print('Please provide a list of columns you want encrypted. When finished, press Enter without typing')        
        in_data = self.readfile_whole()   
        list_of_cols = []
        while True:
            inp = input().rstrip(' ')
            if inp == 'quit':
                sys.exit('You quit the program')            
            if inp == '':
                break
            if inp in list(in_data):
                list_of_cols.append(inp)

        return in_data[list_of_cols]

def encryption(inputcol):
    block = 16 #bytes encryption
    padding = '{' 
    pad = lambda s: s + (block - len(s) % block) * padding 
    #string gets padded with this Padding depending on its size
    encryptAES = lambda c,s: base64.b64encode(c.encrypt(pad(s)))
    #declare function "encodeAES". A cipher (AES) is used to encrypt the 
    #padded string. Additionally, the encrypted string is encoded in b64 format
    keys = []  #initialize a keys column
    encryptedcol = []  #initialize a encrypted column
    for item in inputcol:
        key = base64.b64encode(os.urandom(16)) #generate a random key of size 16 bytes
        keys.append(key) 
    for key,item in zip(keys,inputcol):
        cipher = AES.new(key)  #creates a cipher out of the generated key
        encrypted_item = encryptAES(cipher, str(item))  #encrypt the 
        encryptedcol.append(encrypted_item)
    return [keys, encryptedcol]


def decryption(key, encoded):
    padding = '{'
    decodeAES = lambda c, e: c.decrypt(base64.b64decode(bytes(e, 'utf'))).rstrip(padding)
    #key = base64.b64decode(key)
    cipher = AES.new(key)
    decoded_item = decodeAES(cipher, encoded)
    return decoded_item


def main():
    choice = input('\n\t Welcome to Encrypto! To quit at any arbitrary point, type \'quit\'.\nDo you want to [e]ncrpt or [d]ecrypt?: ')
    #give the user a choice on how he wants to proceed    
    if choice == 'e':
        path = input('Please provide the folder Path: ') # path of the file to be encrypted
        if path == 'quit':
            sys.exit('You quit the program')
        in_file = inputfile(path)  #file is a class inputfile defined above
        in_data = in_file.readfile_cols()  #read the columns from the given 
                                           #file as a dataframe
        in_data_keys = in_data.copy()  # --''--
        for column in in_data:
            col = in_data[column].tolist()
            in_data[column] = encryption(col)[1]
            in_data_keys[column] = encryption(col)[0]
        # a for-loop which runs through the selected columns, and encrypts them
        #returns a tuple where the first item is the key, the second the encrypted
        #column
        print(in_data, in_data_keys)
        main_data = in_file.readfile_whole()  #read the whole file
        main_data.update(in_data)  #switch the original column with the -
        new_data = main_data  # - encrypted column 
        out_name_data = input('Please provide the name for the output file: ' )
        out_name_keys = input('Please provide the name for the output keys file: ' )
        # names of the output files
        output_path = input('Please provide the path for the output data: ')
        new_data.to_csv(output_path + out_name_data + '.csv', sep=';')
        in_data_keys = pd.concat([in_data_keys,in_data], axis = 1)  #concatenate the keys and encrypted column
        in_data_keys.to_csv(output_path + out_name_keys + '.csv', sep=';')
    if choice == 'd':
        key = input('Please provide the key: ')
        encoded = input('Please provide the encrypted string: ')
        print(decryption(key, encoded))

if __name__ == '__main__':
    Main()

读取文件时可能会出现问题:

        def readfile_whole(self):
            if self.file_format == 'csv':
                    data = pd.read_csv(self.file_path, sep = self.separator)
                if self.file_format == 'xlsx' or self.file_format == 'xls':
                    data = pd.read_excel(self.file_path, sheetname = self.sheet_name)
                return data

        # Funktion um die Spalten zu lesen ,die verschlüsselt werden sollen
        def readfile_cols(self):
            print('Please provide a list of columns you want encrypted. When finished, press Enter without typing')        
            in_data = self.readfile_whole()   
            list_of_cols = []
            while True:
                inp = input().rstrip(' ')
                if inp == 'quit':
                    sys.exit('You quit the program')            
                if inp == '':
                    break
                if inp in list(in_data):
                    list_of_cols.append(inp)

            return in_data[list_of_cols]

或在加密时:

def encryption(inputcol):
    block = 16 #bytes encryption
    padding = '{' 
    pad = lambda s: s + (block - len(s) % block) * padding 
    #string gets padded with this Padding depending on its size
    encryptAES = lambda c,s: base64.b64encode(c.encrypt(pad(s)))
    #declare function "encodeAES". A cipher (AES) is used to encrypt the 
    #padded string. Additionally, the encrypted string is encoded in b64 format
    keys = []  # initialize a keys column
    encryptedcol = []  # initialize a encrypted column
    for item in inputcol:
        key = base64.b64encode(os.urandom(16)) # generate a random key of size 16 bytes
        keys.append(key) 
    for key,item in zip(keys,inputcol):
        cipher = AES.new(key)  #creates a cipher out of the generated key
        encrypted_item = encryptAES(cipher, str(item))  #encrypt the 
        encryptedcol.append(encrypted_item)
    return [keys, encryptedcol]

为了在csv文件上编写Dataframes,正在使用pandas函数to_csv

0 个答案:

没有答案