关系数据库随机数据生成器的拓扑排序

时间:2017-06-19 20:02:43

标签: python sql foreign-keys relational-database topological-sort

我正在为关系数据库开发一个随机数据生成器,对于表没有生成器正常运行的关系,但是如果有表有密钥,则生成器会崩溃。

我研究了一下,发现拓扑排序算法可以解决问题,但我无法将这个想法与我的算法联系起来。

拓扑排序算法(http://blog.jupo.org/2012/04/06/topological-sorting-acyclic-directed-graphs/

def topolgical_sort(graph_unsorted):
    """
    Repeatedly go through all of the nodes in the graph, moving each of
    the nodes that has all its edges resolved, onto a sequence that
    forms our sorted graph. A node has all of its edges resolved and
    can be moved once all the nodes its edges point to, have been moved
    from the unsorted graph onto the sorted one.
    """

    # This is the list we'll return, that stores each node/edges pair
    # in topological order.
    graph_sorted = []

    # Convert the unsorted graph into a hash table. This gives us
    # constant-time lookup for checking if edges are unresolved, and
    # for removing nodes from the unsorted graph.
    graph_unsorted = dict(graph_unsorted)

    # Run until the unsorted graph is empty.
    while graph_unsorted:

        # Go through each of the node/edges pairs in the unsorted
        # graph. If a set of edges doesn't contain any nodes that
        # haven't been resolved, that is, that are still in the
        # unsorted graph, remove the pair from the unsorted graph,
        # and append it to the sorted graph. Note here that by using
        # using the items() method for iterating, a copy of the
        # unsorted graph is used, allowing us to modify the unsorted
        # graph as we move through it. We also keep a flag for
        # checking that that graph is acyclic, which is true if any
        # nodes are resolved during each pass through the graph. If
        # not, we need to bail out as the graph therefore can't be
        # sorted.
        acyclic = False
        for node, edges in list(graph_unsorted.items()):
            for edge in edges:
                if edge in graph_unsorted:
                    break
            else:
                acyclic = True
                del graph_unsorted[node]
                graph_sorted.append((node, edges))

        if not acyclic:
            # Uh oh, we've passed through all the unsorted nodes and
            # weren't able to resolve any of them, which means there
            # are nodes with cyclic edges that will never be resolved,
            # so we bail out with an error.
            raise RuntimeError("A cyclic dependency occurred")

    return graph_sorted

算法开发

def main():
try:
    config = json.loads(open('config.json').read())

    sb = StringBuilder()
    sb.append("host=%s " % config['host'])
    sb.append("dbname=%s " % config['dbname'])
    sb.append("user=%s " % config['user'])
    sb.append("password=%s " % config['password'])
    sb.append("port=%s " % config['port'])

    conn = psycopg2.connect(sb.to_string())
    cur = conn.cursor()

    # Getting all tables from database
    sb.clear()
    sb.append("SELECT table_name ")
    sb.append("FROM information_schema.tables ")
    sb.append("WHERE ")
    sb.append("table_type = 'BASE TABLE' ")
    sb.append("AND ")
    sb.append("table_schema NOT IN ('pg_catalog', 'information_schema')")

    cur.execute(sb.to_string())

    for table_name in cur.fetchall():
        sb.clear()
        sb.append("SELECT ")
        sb.append("COLUMN_NAME, DATA_TYPE, ")
        sb.append("IS_NULLABLE, CHARACTER_MAXIMUM_LENGTH ")
        sb.append("FROM INFORMATION_SCHEMA.COLUMNS ")
        sb.append("WHERE TABLE_NAME = '%s'" % table_name[0])

        cur.execute(sb.to_string())

        row = cur.fetchall()

        for _ in xrange(1, config['number_inserts'] + 1):
            sb.clear()
            sb.append("INSERT INTO %s(" % table_name[0])

            for columns in row:
                rdgrd = Rdgrd(Attr(columns[0], columns[1], columns[
                    2], columns[3]))

                if rdgrd.datatype_is_supported() == 'jump':
                    continue
                else:
                    sb.append("%s," % columns[0])

            _sb = sb.to_string()[:-1]
            sb.clear()
            sb.append(_sb)
            sb.append(") VALUES(")

            for columns in row:
                rdgrd = Rdgrd(Attr(columns[0], columns[1], columns[
                    2], columns[3]))

                if rdgrd.datatype_is_supported() == 'jump':
                    continue
                else:
                    sb.append("'%s'," % rdgrd.generate_data())

            _sb = sb.to_string()[:-1]
            sb.clear()
            sb.append(_sb)
            sb.append(")")

            print sb

            cur.execute(sb.to_string())

    conn.commit()
    conn.close()

except psycopg2.OperationalError as e:
    print e
    logging.error(e)
    sys.exit(1)
except Exception as e:
    print e
    logging.error(e)
    sys.exit(1)

欢迎任何建议。

0 个答案:

没有答案