Question

我正在寻找一种在数据框中索引子类别的智能方法我在下面创建了一个非常简单的可重复示例。您将如何编写以下步骤从输入到输出（即我们如何编写color_id变量的创建）？

非常感谢您对此的看法！

output <- data.frame(label = c("red", "red", "blue", "green", "green", "green", "orange"), count = c(2, 2, 1, 3, 3 ,3, 1), color_id = c(1, 2, 1, 1, 2, 3, 1))

enter image description here

phpdoc

enter image description here

祝你好运

Answer 1

使用data.table：

import time
import math
import random

def f(n, k):
    return math.exp(k / n) - 1

def individual(length, min, max):
    'Create a member of the population.'
    return [random.randint(min, max) for x in range(length)]

def population(count, length, min, max):
    """
    Create a number of individuals (i.e. a population).

    count: the number of individuals in the population
    length: the number of values per individual
    min: the minimum possible value in an individual's list of values
    max: the maximum possible value in an individual's list of values

    """
    return [individual(length, min, max) for x in range(count)]

def fitness(individual, target):
    def get_best_last_element(a, b, c):
        s = math.pi - f(eu461.BASE, a) - f(eu461.BASE, b) - f(eu461.BASE, c)
        s += 1

        if s > 1:
            return round(math.log(s) * eu461.BASE)
        else:
            return 0

    def getg():
        return get_best_last_element
    """
    Determine the fitness of an individual. Higher is better.

    individual: the individual to evaluate
    target: the target number individuals are aiming for
    """
    l = get_best_last_element(individual[0], individual[1], individual[2])

    return abs(target - sum([f(eu461.BASE, k) for k in individual]) - f(eu461.BASE, l))

def grade(pop, target):
    'Find average fitness for a population.'
    return sum([fitness(x, target) for x in pop]) / (len(pop))

def evolve(pop, target, retain=0.2, random_select=0.05, mutate=0.01):
    graded = [(fitness(x, target), x) for x in pop]
    graded = [x[1] for x in sorted(graded)]

    retain_length = int(len(graded) * retain)
    parents = graded[:retain_length]

    # randomly add other individuals to
    # promote genetic diversity
    for individual in graded[retain_length:]:
        if random_select > random.random():
            parents.append(individual)

    # mutate some individuals
    for individual in parents:
        if mutate > random.random():
            pos_to_mutate = random.randint(0, len(individual) - 1)

            # this mutation is not ideal, because it
            # restricts the range of possible values,
            # but the function is unaware of the min/max
            # values used to create the individuals,
            individual[pos_to_mutate] = random.randint(min(individual), max(individual))

    # crossover parents to create children
    parents_length = len(parents)
    desired_length = len(pop) - parents_length
    children = []
    while len(children) < desired_length:
        male = random.randint(0, parents_length - 1)
        female = random.randint(0, parents_length - 1)
        if male != female:
            male = parents[male]
            female = parents[female]
            half = len(male) // 2
            if random.randint(0, 1):
                child = male[:half] + female[half:]
            else:
                child = female[:half] + male[half:]
            children.append(child)
    parents.extend(children)

    return parents

def get_best_last_element(a, b, c):
    s = math.pi - f(eu461.BASE, a) - f(eu461.BASE, b) - f(eu461.BASE, c)
    s += 1

    if s > 0:
        return round(math.log(s) * eu461.BASE)
    else:
        return 0

def eu461():
    target = math.pi
    p_count = 10000
    i_length = 3
    i_min = 0
    i_max = round(eu461.BASE * math.log(math.pi + 1))

    p = population(p_count, i_length, i_min, i_max)
    fitness_history = [grade(p, target),]

    for i in range(150):
        p = evolve(p, target)
        fitness_history.append(grade(p, target))

    for datum in fitness_history:
        pass #print (datum)

    return p[0], get_best_last_element(p[0][0], p[0][1], p[0][2]), sum([f(eu461.BASE, k) for k in p[0]]) + f(eu461.BASE, get_best_last_element(p[0][0], p[0][1], p[0][2]))
eu461.BASE = 200

if __name__ == "__main__":
    startTime = time.clock()
    print (eu461())
    elapsedTime = time.clock() - startTime
    print ("Time spent in (", __name__, ") is: ", elapsedTime, " sec")

Answer 2

library(splitstackshape)
getanID(input, 'label')

索引数据框中的子组

2 个答案: