Python:最不常见的字符串出现多少次?

时间:2015-04-09 10:17:42

标签: python function csv

这是一个链接到代码

的csv文件的示例

csv image

我想知道如何查找字段[gas]

中显示最少常见字符串的次数
def least_string(gas):
    if  gas in gasdic:
        gasdic[gas] += 1
    else:
        gasdic[gas] = 1

我将上半部分更改为功能

if  gas in gasCount:
     gasCount[gas] += 1
else:
    gasCount[gas] = 1

打印出来

{'Nitrogen': 3, 'Methane': 3, 'Helium': 2, 'CarbonDioxide': 1, ' Chlorine': 3, 'Oxygen': 3, 'Xenon': 1, 'Hydrogen': 2, 'Argon': 1}

我需要将其更改为功能

smallest = 100000
for key in gasCount:
    if gasCount[key] < smallest:
        smallest = gasCount[key]

answers = []
for key in gasCount:
    if gasCount[key] == smallest:
        answers.append(key)

所以它会打印出来

The least common string appear in the field [gas]: ['CarbonDioxide', 'Xenon', 'Argon']

这是完整的代码

import string

def getFile():
    filename = input('Filename: ') #the file name should be .csv
    file = open(filename, 'r')
    firstline = True

    Line = file.readline()
    if Line == None or Line == '':
        return None

    if firstline: # I do not want to read the field names
        Line = file.readline() # there is more to read
        firstline = False # so I skip them. the code assuems 

    return file

#Count the number of (T's) in the field [correct]
def calcT(correct):
    global tCount
    found = False
    for ch in correct:#look at each character in turn
        if ch in 'tT':
            found = True

    if found:
        tCount +=1
#How many times does the least common string appear in the field [gas]
def least_string(gas):
    if  gas in gasdic:
        gasdic[gas] += 1
    else:
        gasdic[gas] = 1


#Find the sum of the values in the field [quant] less than (408)
def sum_quant(quant):
    global qsum
    if quant < 408:
        qsum += quant

#How many values in the 'code' field do not match the format 9999(x9+)9?
def checkString(astring):
    if len(astring)  != 10:
        return False
    if not astring[0] in string.digits:
        return False
    if not astring[1] in string.digits:
        return False
    if not astring[2] in string.digits:
        return False
    if not astring[3] in string.digits:
        return False
    if not astring[4]=='(':
        return False
    if not astring[5] in string.ascii_lowercase:
        return False
    if not astring[6] in string.digits:
        return False
    if not astring[7]=='+':
        return False
    if not astring[8]==')':
        return False
    if not astring[9] in string.digits:
        return False
    return True



#What is the average value of the numbers in the field [age] in the range (30) and (107) inclusive 
def average_age(age):
    global tAge, ageCount
    if age >= 30 and age <=107:
        tAge += age
    ageCount += 1

#Find the sum of the numbers in field [length] between (2.482) and (6.428) inclusive 
def sum_Length(leng):
    global lensum
    if leng >= 2.482 and leng <= 6.428:
        lensum += leng

#count the lines where gas's have the value (Nitrogen) *or* quant is less than 318
def calcGas(gas, quant):
    global clines
    if gas == 'Nitrogen' or quant < 318:
        clines += 1

def processLine(Line):
    Line = Line.strip()
    fields = Line.split(',')

    correct = fields[0]
    gas = fields[1]
    quant = int(fields[2])
    code = fields[3]
    if checkString(code):
        global cCount
        cCount += 1
    age = int(fields[4])
    leng = float(fields[5])
    calcT(correct)
    sum_Length(leng)
    calcGas(gas, quant)
    average_age(age)
    sum_quant(quant)
    least_string(gas)


def processFile(data):

    for line in data:
        processLine(line)

    data.close()

def displayResults():
    #Count the number of (T's) in the field [correct]
    print('The number of (T) in the field [correct]: %d'%(tCount))
    print('-' *10)
    print(gasdic)
    print('The least common string appear in the field [gas]:%s'%(answers))
    print('-' *10)
    #Find the sum of the values in the field [quant] less than (408)
    print('The sum of the values in the field [quant] less than (408): %d'%(qsum))
    print('-' *10)
    #How many values in the 'code' field do not match the format 9999(x9+)9?
    print('The values in the code field do not match the format 9999(x9+)9: %d'%(cCount))
    print('-' *10)
    #What is the average value of the numbers in the field [age] in the range (30) and (107) inclusive
    print('The average value of numbers in the field[age] in range(30)and(107):%0.2f'%((tAge/ageCount)))
    print('-' *10)
    #Find the sum of the numbers in field [length] between (2.482) and (6.428) inclusive 
    print('The sum of the numbers in field [length] between (2.482) and (6.428): %6.3f'%(lensum))
    print('-' *10)
    #count the lines where gas's have the value (Nitrogen) *or* quant is less than 318
    print('The lines where gas have the value (Nitrogen) *or* quant is less than 318: %d' %(clines))


tCount = 0
qsum = 0
gasdic = {}
answers =[]
cCount = 0
ageCount = 0
tAge = 0
lensum = 0
clines = 0
myfile = getFile()
processFile(myfile)
displayResults()

4 个答案:

答案 0 :(得分:5)

from collections import Counter

def least_common(ls):
    c = Counter(ls)
    m = min(c.values())
    return [k for k, v in c.items() if v == m]

least_common('Foo Bar FooBar Bar'.split()) # ['FooBar', 'Foo']

答案 1 :(得分:1)

def least_common(iterable):
    result = list()
    maximum = max([iterable.count(x) for x in set(iterable)])
    for length in range(maximum + 1):
        for word in set(iterable):
            if iterable.count(word) is length:
                result.append(word)
        if len(result):
            return result

采取的步骤:

  1. 获取最大尺寸,以便我们有一个有意义的边界。
  2. 循环上升范围。
  3. 循环遍历iterable,在这种情况下,它可能是气体。
  4. 如果结果列表的长度不为零,那么我们已经具有最小的共同点;返回!

答案 2 :(得分:1)

你可以按价值sort字典。

>>> a={'Nitrogen': 3, 'Methane': 3, 'Helium': 2, 'CarbonDioxide': 1, ' Chlorine': 3, 'Oxygen': 3, 'Xenon': 1, 'Hydrogen': 2, 'Argon': 1}
>>> sorted(a, key=a.__getitem__)
['Argon', 'CarbonDioxide', 'Xenon', 'Hydrogen', 'Helium', 'Oxygen', 'Nitrogen', ' Chlorine', 'Methane']

答案 3 :(得分:0)

defaultdict在这里很方便:

from collections import defaultdict

def least_string(gas_dict):
    count = defaultdict(int)
    for gas in gas_dict:
        count[gas] += 1
    min_count = min(i[1] for i in count.items())
    return [k for k,v in count.items() if v == min_count]

可能的测试用例:

>>> word_list = "Foo Bar Foobar Bar".split()
>>> print(least_string(word_list))
['Foo', 'Foobar']