如何对CSV文件的列进行排序而不考虑字符大小写?

时间:2016-01-08 22:27:09

标签: python sorting python-3.x

我有一个程序可以在CSV的第1列中随机生成一系列5个字母(ASCII,大写和小写)以及同一CSV的第2列中的4个数字(0-9)。我可以按升序值的顺序对第2列进行排序,但是对第1列进行排序,因为它首先对所有大写值进行排序,然后对小写进行排序。这也输出到一个新文件sorted.csv。

示例:

ANcPI
DLBvA
FpSCo
beMhy
dWDjl

有谁知道如何对这些进行分类,以便套管不会产生影响,而只是信件?它应该排序为:

ANcPI
beMhy
DLBvA
dWDjl
FpSCo

以下是该程序目前的代码:

import random
import string

#se='111411468'                                                                             # Hardcoded value for development of the program
se = int(input('Please enter your ID for specific random number sequence: '))               # Asks user for ID number to generate files based on, uses se as above
random.seed(se,2)                                                                           # Use of 2 in random.seed handles strings, numbers and others
ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'                                              # Uses all lower case ASCII 
ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'                                              # Uses all upper case ASCII
ascii_letters = ascii_lowercase + ascii_uppercase                                           # Combines all ASCII characters giving a higher degree of randomness to the generated files
digits = '0123456789'                                                                       # Range of digit values for column 2 of 'unsorted.csv'

def rubbish_string(selection,l):
    s = ''
    for c in range(l):
        s += selection[random.randint(0,len(selection)-1)]
    return s

def writeRandomFile(filename):
    with open(filename, 'w') as fout:                                                      # With automatically closes the file on finishing its extent,even with exceptions
        fout.write('Personal ID,'+str(se)+'\n')                                            # Write and assembled string, personal ID in grid A1 and personal ID No. in grid B1.....headings for the data
        for xc in range(random.randint(10,20)):
            fout.write(rubbish_string(ascii_letters,5)+','+rubbish_string(digits,4)+'\n')  # Assemble and write a line to the file using 5 from the ascii_letters and 4 from the digits variable

def selectionSort (alist, col):                                                            # Slightly modified function for selection sort from part Q7A
    for fillslot in range(len(alist)-1,0,-1):
        positionOfMax=0                                                                    # Initally the maximum value is positioned at 0, Binary so position 1 
        for location in range(1, fillslot+1):
                                                                                           # Results in a list of size 2, first part is letters, second part is numbers
            line1 = alist[location].split(",")                                             # Makes sense to use "," for a .csv
            line2 = alist[positionOfMax].split(",")

                                                                                           # Column-1 because computers count from zero (Binary)
                                                                                           # When the user enters 1 or 2, the computer deals with it in terms of 0 or 1
            if line1[col - 1] > line2[col - 1]:                                            # Deals with the Binary issue by taking 1 from the input column value from the user
                positionOfMax=location

        temp= alist[fillslot]
        alist[fillslot]=alist[positionOfMax]
        alist[positionOfMax]=temp
                                                                                           # Main part...
                                                                                           # Create random file based on the user data
writeRandomFile('unsorted.csv')


                                                                                           # Allow user pick which column to sort on, either 1 or 2 (could be adapted for more columns)
sortOnColumn = -1
while sortOnColumn != 1 and sortOnColumn != 2:                                             # If the user does not enter an appropriate value, in this case 1 or 2 they are re-asked to input the column based on which ths sorting will be done.
    sortOnColumn = int(input("Which column should the files be sorted on?"))
                                                                                           # Open unsorted file and load into a list
fin = open('unsorted.csv', 'r')                                                            # Opens a file for reading, called fin
data = []                                                                                  # Creates an empty list named data
header = next(fin)                                                                         # Skip first line because its Personal ID data, not random data generated by the program
for line in fin:                                                                           # Loops through lines in fin
    data.append(line.strip())                                                          # Adds the line to the list, appending the list[]

selectionSort(data, sortOnColumn)                                                          # Sort list with the selection sort algorithm, calling the function, where data=the list and sortOnColum=user choice


                                                                                           # Write the now sorted list to a file, called fout
fout = open('sorted.csv', 'w')                                                             # Opening the empty sort.csv in write mode
fout.write(header)                                                                         # Write PersonID data first at the top of the .csv as in the unsorted format
for entry in data:                                                                         # Write ordered data
    fout.write(entry)
    data.sort(key=lambda m : m.lower())                                                # Sorts col.1 based on non case sensitive letters but issues with col.2..............
    fout.write("\n")                                                                   # Formating with "\n"
fout.close()                                                                               # Close the file so not to have generated just a blank .csv with no data
print('Your Files Have Been Generated, Goodbye!')

1 个答案:

答案 0 :(得分:0)

您的代码中存在很多错误,如PEP8 online所示。如果可以,请在将来使用PyCharm CommunityPyCharm Edu,以便在编写代码时自动检查代码。以下是您的代码的修订版本:

import csv
import random
import string
import sys

UNSORTED_FILENAME = 'unsorted.csv'
SORTED_FILENAME = 'sorted.csv'


def main():
    """This function handles execution of the entire program."""
    # seed = setup_random_sequence(111411468)
    seed = setup_random_sequence()
    write_random_file(UNSORTED_FILENAME, seed)
    header, data = load_and_sort(UNSORTED_FILENAME)
    write_csv_file(SORTED_FILENAME, header, data)
    print('Your files have been generated. Goodbye!')


def setup_random_sequence(seed=None):
    """Seed the random number generator with some specific value."""
    if seed is None:
        seed = get_int('Enter your ID for your random number sequence: ')
    random.seed(seed, 2)
    return seed


def get_int(prompt='>>> '):
    """Get an integer from the user and return it to the function's caller."""
    while True:
        try:
            text = input(prompt)
        except EOFError:
            sys.exit()
        else:
            try:
                return int(text)
            except ValueError:
                print('You must enter an integer.')


def write_random_file(filename, seed):
    """Create random file based on the user data."""
    rows = ((rubbish_string(string.ascii_letters, 5),
             rubbish_string(string.digits, 4))
            for _ in range(random.randint(10, 20)))
    write_csv_file(filename, ('Personal ID', seed), rows)


def write_csv_file(filename, header, data):
    """Write the now sorted list to a file."""
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(header)
        writer.writerows(data)


def rubbish_string(selection, length):
    """Create a string of given length built of characters from selection."""
    return ''.join(random.choice(selection) for _ in range(length))


def load_and_sort(filename):
    """Load the file given by filename and sort by user specification."""
    sort_on_column = None
    while True:
        sort_on_column = get_int('Which column should file be sorted on? ')
        if sort_on_column in {1, 2}:
            break
        print('Column number is out of range.')
    with open(filename, newline='') as file:
        reader = csv.reader(file)
        header = next(reader)
        data = list(reader)
    selection_sort(data, sort_on_column)
    return header, data


def selection_sort(array, column):
    """This is a modified function for selection sort from part Q7A."""
    array.sort(key=lambda row: row[column - 1].casefold())

if __name__ == '__main__':
    main()

您可能会注意到PEP8 online,代码不再有任何错误。如果您需要进一步更改,请告诉我们。