我正在为关系数据库开发一个随机数据生成器,对于表没有生成器正常运行的关系,但是如果有表有密钥,则生成器会崩溃。
我研究了一下,发现拓扑排序算法可以解决问题,但我无法将这个想法与我的算法联系起来。
拓扑排序算法(http://blog.jupo.org/2012/04/06/topological-sorting-acyclic-directed-graphs/)
def topolgical_sort(graph_unsorted):
"""
Repeatedly go through all of the nodes in the graph, moving each of
the nodes that has all its edges resolved, onto a sequence that
forms our sorted graph. A node has all of its edges resolved and
can be moved once all the nodes its edges point to, have been moved
from the unsorted graph onto the sorted one.
"""
# This is the list we'll return, that stores each node/edges pair
# in topological order.
graph_sorted = []
# Convert the unsorted graph into a hash table. This gives us
# constant-time lookup for checking if edges are unresolved, and
# for removing nodes from the unsorted graph.
graph_unsorted = dict(graph_unsorted)
# Run until the unsorted graph is empty.
while graph_unsorted:
# Go through each of the node/edges pairs in the unsorted
# graph. If a set of edges doesn't contain any nodes that
# haven't been resolved, that is, that are still in the
# unsorted graph, remove the pair from the unsorted graph,
# and append it to the sorted graph. Note here that by using
# using the items() method for iterating, a copy of the
# unsorted graph is used, allowing us to modify the unsorted
# graph as we move through it. We also keep a flag for
# checking that that graph is acyclic, which is true if any
# nodes are resolved during each pass through the graph. If
# not, we need to bail out as the graph therefore can't be
# sorted.
acyclic = False
for node, edges in list(graph_unsorted.items()):
for edge in edges:
if edge in graph_unsorted:
break
else:
acyclic = True
del graph_unsorted[node]
graph_sorted.append((node, edges))
if not acyclic:
# Uh oh, we've passed through all the unsorted nodes and
# weren't able to resolve any of them, which means there
# are nodes with cyclic edges that will never be resolved,
# so we bail out with an error.
raise RuntimeError("A cyclic dependency occurred")
return graph_sorted
算法开发
def main():
try:
config = json.loads(open('config.json').read())
sb = StringBuilder()
sb.append("host=%s " % config['host'])
sb.append("dbname=%s " % config['dbname'])
sb.append("user=%s " % config['user'])
sb.append("password=%s " % config['password'])
sb.append("port=%s " % config['port'])
conn = psycopg2.connect(sb.to_string())
cur = conn.cursor()
# Getting all tables from database
sb.clear()
sb.append("SELECT table_name ")
sb.append("FROM information_schema.tables ")
sb.append("WHERE ")
sb.append("table_type = 'BASE TABLE' ")
sb.append("AND ")
sb.append("table_schema NOT IN ('pg_catalog', 'information_schema')")
cur.execute(sb.to_string())
for table_name in cur.fetchall():
sb.clear()
sb.append("SELECT ")
sb.append("COLUMN_NAME, DATA_TYPE, ")
sb.append("IS_NULLABLE, CHARACTER_MAXIMUM_LENGTH ")
sb.append("FROM INFORMATION_SCHEMA.COLUMNS ")
sb.append("WHERE TABLE_NAME = '%s'" % table_name[0])
cur.execute(sb.to_string())
row = cur.fetchall()
for _ in xrange(1, config['number_inserts'] + 1):
sb.clear()
sb.append("INSERT INTO %s(" % table_name[0])
for columns in row:
rdgrd = Rdgrd(Attr(columns[0], columns[1], columns[
2], columns[3]))
if rdgrd.datatype_is_supported() == 'jump':
continue
else:
sb.append("%s," % columns[0])
_sb = sb.to_string()[:-1]
sb.clear()
sb.append(_sb)
sb.append(") VALUES(")
for columns in row:
rdgrd = Rdgrd(Attr(columns[0], columns[1], columns[
2], columns[3]))
if rdgrd.datatype_is_supported() == 'jump':
continue
else:
sb.append("'%s'," % rdgrd.generate_data())
_sb = sb.to_string()[:-1]
sb.clear()
sb.append(_sb)
sb.append(")")
print sb
cur.execute(sb.to_string())
conn.commit()
conn.close()
except psycopg2.OperationalError as e:
print e
logging.error(e)
sys.exit(1)
except Exception as e:
print e
logging.error(e)
sys.exit(1)
欢迎任何建议。