橙色数据表 - 组合2个表

时间:2015-03-29 22:19:26

标签: python data-mining orange

我一直在尝试将没有决策列的一个数据表复制到第二个表中,然后将数据混洗并将这两个表合并为一个。我无法实现它,因为我遇到了错误。我认为橙色文档不够好,我不知道如何实现这一点。这是我的代码和我得到的错误。如果有人知道怎么做,请帮助我。

db_name = "titanic"
data = Orange.data.Table(db_name)

new_domain = data

input_domain = new_domain.domain

all_vars = (list(input_domain.variables) +
            list(input_domain.class_vars) +
            input_domain.getmetas().values())

edited_vars = []

# Apply any saved transformations as listed in
# `domain_change_hints`
import OWEditDomain

for var in range(len(all_vars)-1):
    for x in range(len(all_vars[var].values)):
        all_vars[var].values[x] +='_1'

    all_vars[var].name += '_1'

    desc = OWEditDomain.variable_description(all_vars[var])

    try:
        new = OWEditDomain.variable_from_description(desc)
    except ValueError, ex:
        new = None

    if new is not None:
        # Make  sure orange's domain transformations will work.
        new.source_variable = all_vars[var]
        new.get_value_from =               Orange.core.ClassifierFromVar(whichVar=all_vars[var])
        all_vars[var] = new

    edited_vars.append(all_vars[var])

tabela = Orange.data.Table(new_domain, edited_vars)

tabela.shuffle()

data = Orange.data.Table(db_name)

merged = Orange.data.Table([data, tabela])
#
print "Domain 1: ", data.domain
print "Domain 2: ", tabela.domain
print "Merged:   ", merged.domain

错误:

Traceback (most recent call last):
  File "C:/Python27/Lib/site-packages/Orange/OrangeWidgets/Classify/OWCN2.py", line 412, in <module>
    merged = Orange.data.Table([data, tabela])
orange.KernelException: 'orange.ExampleTable': mismatching value of attribute 'survived' in example #0

1 个答案:

答案 0 :(得分:1)

试试这个

def clone_var(var, namefmt="%s"):
    if isinstance(var, Orange.feature.Discrete):
        newvar = Orange.feature.Discrete(namefmt % var.name, values=var.values)
    elif isinstance(var, Orange.feature.Continuous):
        newvar = Orange.feature.Continuous(namefmt % var.name)
        newvar.number_of_decimals = var.number_of_decimals
        newvar.scientific_format = var.scientific_format
    elif isinstance(var, Orange.feature.String):
        newvar = Orange.feature.String(namefmt % var.name)
    else:
        raise TypeError
    newvar.attributes = dict(var.attributes)
    return newvar


def clone_table(table):
    features = [clone_var(var, "%s_1") for var in table.domain.features]
    if table.domain.class_var is not None:
        class_var = clone_var(table.domain.class_var, "%s_1")
    else:
        class_var = None
    class_vars = [clone_var(var, "%s_1") for var in table.domain.class_vars]
    domain = Orange.data.Domain(features, class_var, class_vars=class_vars)
    return Orange.data.Table(domain, table.native(2))

# copy of table without the class column
table_nocls = Orange.data.Table(Orange.data.Domain(table.domain.features, None), table)
# clone it, creating new variables, and shuffle
table_clone = clone_table(table_nocls)
table_clone.shuffle()

merged = Orange.data.Table([table, table_clone])