我承认这基本上是Use freebase data on local server?的重复问题,但我需要比已经提供的更详细的答案
我绝对爱上了Freebase。我现在想要的是基本上创建一个非常简单的Freebase克隆,用于存储可能不属于Freebase本身的内容,但可以使用Freebase架构进行描述。基本上我想要的是一种简单而优雅的方式来存储像Freebase本身那样的数据,并且能够在Python(CherryPy)Web应用程序中轻松使用这些数据。
MQL参考指南的第2章规定:
作为Metaweb基础的数据库与您可能熟悉的关系数据库根本不同。关系数据库以表的形式存储数据,但 Metaweb数据库将数据存储为节点图和这些节点之间的关系。
我想这意味着我应该使用三重存储或Neo4j等图形数据库?这里有人有使用Python环境中任何一种的经验吗?
(到目前为止我实际尝试的是创建一个能够轻松存储Freebase主题的关系数据库模式,但是我遇到了在SQLAlchemy中配置映射的问题。)
我正在研究的事情
更新[28/12/2011]:
我在Freebase博客上发现了一篇文章,描述了Freebase自己使用的专有元组存储/数据库(图形):http://blog.freebase.com/2008/04/09/a-brief-tour-of-graphd/
答案 0 :(得分:10)
这对我有用。它允许您在少于100GB的磁盘上加载标准MySQL安装中的所有Freebase转储。关键是要了解转储中的数据布局,然后对其进行转换(针对空间和速度进行优化)。
在您尝试使用之前,您应该了解Freebase概念(全部来自文档):
其他一些重要的Freebase细节:
[{'id':'/','mid':null}]
'/m/0cwtm'
是一个人); '/m/03lmb2f'
的此实体'/film/performance'
不是主题(我选择将这些视为{{{ RDF中的3}}虽然这可能在哲学上不准确),但'/m/04y78wb'
类型的'/film/director'
(以及其他)是; (参见底部的Python代码)
TRANSFORM 1 (来自shell,忽略notable_for和non / lang / en文本的命名空间中的拆分链接):
python parse.py freebase.tsv #end up with freebase_links.tsv and freebase_ns.tsv
TRANSFORM 2 (来自Python控制台,在freebase_ns_types.tsv上拆分freebase_ns.tsv,freebase_ns_props.tsv以及我们现在忽略的其他15个)
import e
e.split_external_keys( 'freebase_ns.tsv' )
TRANSFORM 3 (从Python控制台,将属性和目标转换为mids)
import e
ns = e.get_namespaced_data( 'freebase_ns_types.tsv' )
e.replace_property_and_destination_with_mid( 'freebase_links.tsv', ns ) #produces freebase_links_pdmids.tsv
e.replace_property_with_mid( 'freebase_ns_props.tsv', ns ) #produces freebase_ns_props_pmids.tsv
TRANSFORM 4 (来自MySQL控制台,在数据库中加载freebase_links_mids.tsv,freebase_ns_props_mids.tsv和freebase_ns_types.tsv):
CREATE TABLE links(
source VARCHAR(20),
property VARCHAR(20),
destination VARCHAR(20),
value VARCHAR(1)
) ENGINE=MyISAM CHARACTER SET utf8;
CREATE TABLE ns(
source VARCHAR(20),
property VARCHAR(20),
destination VARCHAR(40),
value VARCHAR(255)
) ENGINE=MyISAM CHARACTER SET utf8;
CREATE TABLE types(
source VARCHAR(20),
property VARCHAR(40),
destination VARCHAR(40),
value VARCHAR(40)
) ENGINE=MyISAM CHARACTER SET utf8;
LOAD DATA LOCAL INFILE "/data/freebase_links_pdmids.tsv" INTO TABLE links FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE "/data/freebase_ns_props_pmids.tsv" INTO TABLE ns FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE "/data/freebase_ns_base_plus_types.tsv" INTO TABLE types FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
CREATE INDEX links_source ON links (source) USING BTREE;
CREATE INDEX ns_source ON ns (source) USING BTREE;
CREATE INDEX ns_value ON ns (value) USING BTREE;
CREATE INDEX types_source ON types (source) USING BTREE;
CREATE INDEX types_destination_value ON types (destination, value) USING BTREE;
将此保存为e.py:
import sys
#returns a dict to be used by mid(...), replace_property_and_destination_with_mid(...) bellow
def get_namespaced_data( file_name ):
f = open( file_name )
result = {}
for line in f:
elements = line[:-1].split('\t')
if len( elements ) < 4:
print 'Skip...'
continue
result[(elements[2], elements[3])] = elements[0]
return result
#runs out of memory
def load_links( file_name ):
f = open( file_name )
result = {}
for line in f:
if len( result ) % 1000000 == 0:
print len(result)
elements = line[:-1].split('\t')
src, prop, dest = elements[0], elements[1], elements[2]
if result.get( src, False ):
if result[ src ].get( prop, False ):
result[ src ][ prop ].append( dest )
else:
result[ src ][ prop ] = [dest]
else:
result[ src ] = dict([( prop, [dest] )])
return result
#same as load_links but for the namespaced data
def load_ns( file_name ):
f = open( file_name )
result = {}
for line in f:
if len( result ) % 1000000 == 0:
print len(result)
elements = line[:-1].split('\t')
src, prop, value = elements[0], elements[1], elements[3]
if result.get( src, False ):
if result[ src ].get( prop, False ):
result[ src ][ prop ].append( value )
else:
result[ src ][ prop ] = [value]
else:
result[ src ] = dict([( prop, [value] )])
return result
def links_in_set( file_name ):
f = open( file_name )
result = set()
for line in f:
elements = line[:-1].split('\t')
result.add( elements[0] )
return result
def mid( key, ns ):
if key == '':
return False
elif key == '/':
key = '/boot/root_namespace'
parts = key.split('/')
if len(parts) == 1: #cover the case of something which doesn't start with '/'
print key
return False
if parts[1] == 'm': #already a mid
return key
namespace = '/'.join(parts[:-1])
key = parts[-1]
return ns.get( (namespace, key), False )
def replace_property_and_destination_with_mid( file_name, ns ):
fn = file_name.split('.')[0]
f = open( file_name )
f_out_mids = open(fn+'_pdmids'+'.tsv', 'w')
def convert_to_mid_if_possible( value ):
m = mid( value, ns )
if m: return m
else: return None
counter = 0
for line in f:
elements = line[:-1].split('\t')
md = convert_to_mid_if_possible(elements[1])
dest = convert_to_mid_if_possible(elements[2])
if md and dest:
elements[1] = md
elements[2] = dest
f_out_mids.write( '\t'.join(elements)+'\n' )
else:
counter += 1
print 'Skipped: ' + str( counter )
def replace_property_with_mid( file_name, ns ):
fn = file_name.split('.')[0]
f = open( file_name )
f_out_mids = open(fn+'_pmids'+'.tsv', 'w')
def convert_to_mid_if_possible( value ):
m = mid( value, ns )
if m: return m
else: return None
for line in f:
elements = line[:-1].split('\t')
md = convert_to_mid_if_possible(elements[1])
if md:
elements[1]=md
f_out_mids.write( '\t'.join(elements)+'\n' )
else:
#print 'Skipping ' + elements[1]
pass
#cPickle
#ns=e.get_namespaced_data('freebase_2.tsv')
#import cPickle
#cPickle.dump( ns, open('ttt.dump','wb'), protocol=2 )
#ns=cPickle.load( open('ttt.dump','rb') )
#fn='/m/0'
#n=fn.split('/')[2]
#dir = n[:-1]
def is_mid( value ):
parts = value.split('/')
if len(parts) == 1: #it doesn't start with '/'
return False
if parts[1] == 'm':
return True
return False
def check_if_property_or_destination_are_mid( file_name ):
f = open( file_name )
for line in f:
elements = line[:-1].split('\t')
#if is_mid( elements[1] ) or is_mid( elements[2] ):
if is_mid( elements[1] ):
print line
#
def split_external_keys( file_name ):
fn = file_name.split('.')[0]
f = open( file_name )
f_out_extkeys = open(fn+'_extkeys' + '.tsv', 'w')
f_out_intkeys = open(fn+'_intkeys' + '.tsv', 'w')
f_out_props = open(fn+'_props' + '.tsv', 'w')
f_out_types = open(fn+'_types' + '.tsv', 'w')
f_out_m = open(fn+'_m' + '.tsv', 'w')
f_out_src = open(fn+'_src' + '.tsv', 'w')
f_out_usr = open(fn+'_usr' + '.tsv', 'w')
f_out_base = open(fn+'_base' + '.tsv', 'w')
f_out_blg = open(fn+'_blg' + '.tsv', 'w')
f_out_bus = open(fn+'_bus' + '.tsv', 'w')
f_out_soft = open(fn+'_soft' + '.tsv', 'w')
f_out_uri = open(fn+'_uri' + '.tsv', 'w')
f_out_quot = open(fn+'_quot' + '.tsv', 'w')
f_out_frb = open(fn+'_frb' + '.tsv', 'w')
f_out_tag = open(fn+'_tag' + '.tsv', 'w')
f_out_guid = open(fn+'_guid' + '.tsv', 'w')
f_out_dtwrld = open(fn+'_dtwrld' + '.tsv', 'w')
for line in f:
elements = line[:-1].split('\t')
parts_2 = elements[2].split('/')
if len(parts_2) == 1: #the blank destination elements - '', plus the root domain ones
if elements[1] == '/type/object/key':
f_out_types.write( line )
else:
f_out_props.write( line )
elif elements[2] == '/lang/en':
f_out_props.write( line )
elif (parts_2[1] == 'wikipedia' or parts_2[1] == 'authority') and len( parts_2 ) > 2:
f_out_extkeys.write( line )
elif parts_2[1] == 'm':
f_out_m.write( line )
elif parts_2[1] == 'en':
f_out_intkeys.write( line )
elif parts_2[1] == 'source' and len( parts_2 ) > 2:
f_out_src.write( line )
elif parts_2[1] == 'user':
f_out_usr.write( line )
elif parts_2[1] == 'base' and len( parts_2 ) > 2:
if elements[1] == '/type/object/key':
f_out_types.write( line )
else:
f_out_base.write( line )
elif parts_2[1] == 'biology' and len( parts_2 ) > 2:
f_out_blg.write( line )
elif parts_2[1] == 'business' and len( parts_2 ) > 2:
f_out_bus.write( line )
elif parts_2[1] == 'soft' and len( parts_2 ) > 2:
f_out_soft.write( line )
elif parts_2[1] == 'uri':
f_out_uri.write( line )
elif parts_2[1] == 'quotationsbook' and len( parts_2 ) > 2:
f_out_quot.write( line )
elif parts_2[1] == 'freebase' and len( parts_2 ) > 2:
f_out_frb.write( line )
elif parts_2[1] == 'tag' and len( parts_2 ) > 2:
f_out_tag.write( line )
elif parts_2[1] == 'guid' and len( parts_2 ) > 2:
f_out_guid.write( line )
elif parts_2[1] == 'dataworld' and len( parts_2 ) > 2:
f_out_dtwrld.write( line )
else:
f_out_types.write( line )
将其保存为parse.py:
import sys
def parse_freebase_quadruple_tsv_file( file_name ):
fn = file_name.split('.')[0]
f = open( file_name )
f_out_links = open(fn+'_links'+'.tsv', 'w')
f_out_ns = open(fn+'_ns' +'.tsv', 'w')
for line in f:
elements = line[:-1].split('\t')
if len( elements ) < 4:
print 'Skip...'
continue
#print 'Processing ' + str( elements )
#cases described here http://wiki.freebase.com/wiki/Data_dumps
if elements[1].endswith('/notable_for'): #ignore notable_for, it has JSON in it
continue
elif elements[2] and not elements[3]: #case 1, linked
f_out_links.write( line )
elif not (elements[2].startswith('/lang/') and elements[2] != '/lang/en'): #ignore languages other than English
f_out_ns.write( line )
if len(sys.argv[1:]) == 0:
print 'Pass a list of .tsv filenames'
for file_name in sys.argv[1:]:
parse_freebase_quadruple_tsv_file( file_name )
e.get_namespaced_data( 'freebase_ns_types.tsv' )
)此处标准免责声明。我这样做了几个月。我相信它大部分是正确的,但如果我的笔记遗漏了某些东西,我会道歉。不幸的是,我需要它的项目已经崩溃了,但希望这有助于其他人。如果有什么不清楚,请在此处发表评论。
答案 1 :(得分:6)
我的2美分......
我使用一些Java代码将Freebase数据转储转换为RDF:https://github.com/castagna/freebase2rdf
我使用Apache Jena的TDB存储来加载RDF数据,使用Fuseki通过HTTP上的SPARQL协议来提供数据。
另见:
答案 2 :(得分:3)
SPARQL是查询RDF的查询语言,它允许编写类似SQL的查询。大多数RDF数据库都实现了SPARQL接口。此外,Freebase允许您在RDF中导出数据,因此您可以直接在RDF数据库中使用该数据并使用SPARQL进行查询。
我会看一下this tutorial以更好地了解SPARQL。
如果您要处理大数据集,例如freebase,我会将4store与任何Python clients一起使用。 4store通过HTTP公开SPARQL,您可以发出HTTP请求来断言,删除和查询数据。它还处理JSON中的结果集,这对Python非常方便。我在几个项目中使用过这个基础设施,不是使用CherryPy而是使用Django,但我想这种差异并不重要。
答案 3 :(得分:2)
对于freebase转储用户来说,好消息是Freebase现在提供RDF转储:http://wiki.freebase.com/wiki/Data_dumps。它采用乌龟格式,因此使用为RDF设计的任何图形数据库都非常方便。
我的建议也是4store:http://4store.org/。它简单易用。 您可以使用http请求执行SPARQL操作。
我的项目中有一个棘手的问题是&#34;。&#34;用于Freebase转储(代表缩短URL)的4store无法识别。所以我添加一个括号&#34;&lt;&gt;&#34; o包含的所有列&#34;。&#34;并自己处理缩短的网址。
答案 4 :(得分:1)
看看https://cayley.io。我相信它是由同一作者编写的,并且在Google杀死它之前使用与graphd
相同的原则,Freebase的后端。
关于数据,您可能希望运行类似this的内容来清理the Freebase DB dumps或使用datahub。
答案 5 :(得分:0)
这是我的另一个答案的额外代码。肉在edb.py.从Python控制台运行并按照示例进行操作。或者使用web2py控制器并在浏览器中运行。
将其另存为edb.py:
import MySQLdb
import sys
connection = MySQLdb.connect (host = "localhost",
user = "root",
passwd = "x",
db = "y")
cursor = connection.cursor()
query_counter = 0
print_queries = False
limit = 1000
def fetch_one( query ):
global query_counter, print_queries
query = query + ' LIMIT ' + str(limit)
if print_queries:
print query
cursor = connection.cursor()
cursor.execute( query )
query_counter += 1
result = cursor.fetchone()
if result:
return result[0]
else:
return None
def fetch_all( query ):
global query_counter, print_queries
query = query + ' LIMIT ' + str(limit)
if print_queries:
print query
cursor = connection.cursor()
cursor.execute( query )
query_counter += 1
return cursor.fetchall()
def _flatten( list_of_lists ):
import itertools
return list(itertools.chain(*list_of_lists))
#Example: e._search_by_name('steve martin')
def _search_by_name( name, operator = '=' ):
typed, ranked = {}, []
if name:
name = name.strip()
if not name:
return ( typed, ranked )
filler = '' if operator == '=' else '%'
ranks = {}
#to filter meaningful stuff for every mid returned order by the number of types they have
#search for value text if prop. is
#select * from ns where value = 'the king' and (property = '/m/01gr' or property = '/m/06b');
name_mid = _mid( '/type/object/name' )
alias_mid = _mid( '/common/topic/alias' )
query = "select ns.source from ns where ns.value %s '%s%s' and ns.property in ('%s', '%s')" % ( operator, name, filler, name_mid, alias_mid )
for i in fetch_all( query ):
typed[ i[0] ] = _types( i[0] )
import operator
ranked = [ ( len( typed[i] ), i ) for i in typed ]
ranked = [ e[1] for e in sorted( ranked, key=operator.itemgetter(0), reverse = True ) ]
return (typed, ranked)
#Example: e._children('') <---will get the top level domains
# e._children('/film') <---get all types from the domain
# e._children('/film/film') <---get all properties for the type
def _children( parent, expand = False, raw = False ):
query = "select t.source, t.value from types t where t.destination = '%s'" % (parent)
res = fetch_all( query )
if raw:
return [ row[0] for row in res ]
if expand: prefix = parent
else: prefix = ''
return [ prefix + '/' + row[1] for row in fetch_all(query) ]
#Example: e._parent('/film/film/songs')
def _parent( child ): # '/people/marriage/to' -> '/people/marriage'
#if not isinstance( child, str ): return None # what kind of safety mechanisms do we need here?
return '/'.join(child.split('/')[:-1])
#Example: e._domains()
def _domains():
return _children('')
#Example: e._top_level_types()
def _top_level_types():
return _children('/type')
#TODO get all primitive types
#Example: e._mid('/type/object')
# e._mid('/authority/imdb/name/nm0000188')
def _mid( key ):
if key == '':
return None
elif key == '/':
key = '/boot/root_namespace'
parts = key.split('/')
if parts[1] == 'm': #already a mid
return key
namespace = '/'.join(parts[:-1])
key = parts[-1]
return fetch_one( "select source from types t where t.destination = '%s' and t.value = '%s'" % (namespace, key) )
#Example: e._key('/type')
def _key( mid ):
if isinstance( mid, str):
res = _keys( mid )
if not res:
return None
rt = [ r for r in res if r.startswith( '/type' ) ]
if rt:
return rt[0]
else:
return res[0]
elif isinstance( mid, list ) or isinstance( mid, tuple ):
res = [ _key( e ) for e in mid ]
return [ r for r in res if r is not None ]
else:
return None
def _keys( mid ):
# check for '/type/object/key' as well?
query = "select t.destination, t.value from types t where t.source = '%s'" % mid
return [ row[0]+'/'+row[1] for row in fetch_all( query ) ]
#Example: e._types('/m/0p_47')
def _types( mid ):
tm = _mid( '/type/object/type' )
query = "select l.destination from links l where l.source = '%s' and l.property = '%s'" % (mid, tm)
return [ row[0] for row in fetch_all( query ) ]
#Example: e._props_n('/m/0p_47') <---Named immediate properties (like name, etc.)
def _props_n( mid ): #the same property can be set more than once per topic!
query = "select ns.property from ns where ns.source = '%s'" % (mid)
return list( set( [ row[0] for row in fetch_all( query ) ] ) )
#Example: e._props_l('/m/0p_47') <---All remote properties, some are named, some are anonymous
def _props_l( mid ): #the same property can be set more than once per topic!
tm = _mid( '/type/object/type' ) #exclude types, they have tons of instance links
res = fetch_all( "select l.property, l.destination from links l where l.source = '%s' and property <> '%s'" % (mid, tm) )
output = {}
for r in res:
dests = output.get( r[0], False )
if dests:
dests.append( r[1] )
else:
output[ r[0] ] = [ r[1] ]
return output
#Example: e._props_ln('/m/0p_47') <---All remote named properties
def _props_ln( mid ): #named properties
result = []
ps = _props_l( mid )
common_topic = _mid( '/common/topic' )
for p in ps:
ts = _types( ps[p][0] )
if common_topic in ts: #it's a common topic
result.append( p )
return result
#Example: e._props_la('/m/0p_47') <---All remote anonymous properties, these actually belong to the children!
#instead of has type /common/topic we used to check if it has name
def _props_la( mid, raw = True ): #anonymous properties (blank nodes in RDF?)
result = []
ps = _props_l( mid )
common_topic = _mid( '/common/topic' )
for p in ps:
ts = _types( ps[p][0] )
if common_topic not in ts: #it is not a common topic
t = _key( _types( ps[p][0] ) )
if t and '/type/type' not in t: #FIXME: hack not to go into types, could be done better
result.append( _children( t[0], expand=True, raw=raw ) ) #get the first, is this correct?
return _flatten( result ) #it is a list of lists
#FIXME: try to get '/film/actor/film' -> '/type/property/expected_type' -> '/film/performance' -> properties/children
#instead of trying is something has name
#Example: e._get_n('/m/0p_47', e._props_n('/m/0p_47')[0])['/lang/en'] <---These come with a namespace
def _get_n( mid, prop ): #the same property can be set more than once per topic!
p = _mid( prop )
query = "select ns.value from ns where ns.source = '%s' and ns.property = '%s'" % (mid, p)
return [ r[0] for r in fetch_all( query ) ]
#Example: e._get_l('/m/0p_47', e._props_l('/m/0p_47')[0]) <---returns a list of mids coresponding to that prop.
# e._name(e._get_l('/m/0p_47', '/film/writer/film'))
def _get_l( mid, prop ): #the same property can be set more than once per topic!
p = _mid( prop )
query = "select l.destination from links l where l.source = '%s' and l.property = '%s'" % (mid, p)
return [ row[0] for row in fetch_all( query ) ]
#Example: e._name(e._get_ln('/m/0p_47', e._props_ln('/m/0p_47')[0]))
def _get_ln( mid, p ): #just alias for _get_l, keeping for consistency
return _get_l( mid, p )
#Example: e._name(e._get_la('/m/0p_47', '/film/performance/film'))
def _get_la( mid, prop ):
result = []
ps = _props_l( mid )
for p in ps:
es = _get_l( mid, p ) #get the destinations
if not es: continue
ts = set( _types( es[0] ) )
if _mid(_parent(_key(_mid(prop)))) in ts: #should be able to do this more efficiently!!!
for e in es:
result.append( _get_l( e, prop ) )
return _flatten( result ) #return after the first result
#How do we determine properties with multiple values vs those with singular (i.e. place of birth)?
#is this in the ontology?
#Ans: yes, /type/property/unique
#Example: e._all_names_ln('/m/0p_47') <---gets all of object's remote named properties
def _all_names_ln( mid ):
result = {}
for p in _props_ln( mid ):
result[ _key(p) ] = _name( _get_ln( mid, p ) )
return result
#Example: e._all_names_la('/m/0p_47') <---gets all of object's remote anonymous properties
def _all_names_la( mid ): #TODO: prevent loops, run e.all_names_la('/m/0p_47')
result = {}
for p in _props_la( mid ):
result[ _key( p ) ] = _name ( _get_la( mid, p ) )
return result
#FIXME: _all_names_la is going into destinations which are types and have a ton of instance links...
#Example: e._name('/m/0p_47') <---the name of a topic
#
def _name( mid ):
if isinstance( mid, str ):
nm = _mid( '/type/object/name' )
return _get_n( mid, nm )
elif isinstance( mid, list ) or isinstance( mid, tuple ) or isinstance( mid, set ):
return [ _name( e ) for e in mid ]
else:
return None
#for internal use only
def _get_linked( mid ):
tm = _mid( '/type/object/type' ) #exclude types, they have tons of instance links
query = "select destination from links where source = '%s' and property <> '%s' " % ( mid, tm )
return set( [ r[0] for r in fetch_all( query ) ] )
#for internal use only
def _get_connections_internal( entity1, target, path, all_paths, depth, max_depth):
import copy
if depth > max_depth:
return
if True:
print
print str(entity1) + ', ' + str(target)
print str( path )
print str( all_paths )
print depth
path.append( entity1 )
linked1 = _get_linked( entity1 )
if target in linked1 or entity1 == target:
path.append( target )
all_paths.append( path )
#print str( path )
return
for l1 in linked1:
if l1 in path:
continue
_get_connections_internal( l1,
target,
copy.copy( path ),
all_paths,
depth+1,
max_depth )
#Example: e._name(e._get_connections('/m/0p_47', '/m/0cwtm')) <---find path in the graph between the two entities
def _get_connections( entity1, target ):
result = []
_get_connections_internal( entity1, target, [], result, 0, 2 )
return result
#for internal use only
def _get_connections_internal2( entity1, entity2, path1, path2, all_paths, depth, max_depth, level ):
import copy
if depth > max_depth:
return
if level < 0: level = 0
path1.append( entity1 )
path2.append( entity2 )
if entity1 == entity2 and level == 0:
all_paths.append( ( path1, path2 ) ) #no need to append entity1 or entity2 to the paths
return
linked1 = _get_linked( entity1 )
if entity2 in linked1 and entity2 not in path1 and level == 0:
path1.append( entity2 )
all_paths.append( ( path1, path2 ) )
return
linked2 = _get_linked( entity2 )
if entity1 in linked2 and entity1 not in path2 and level == 0:
path2.append( entity1 )
all_paths.append( ( path1, path2 ) )
return
inters = linked1.intersection( linked2 )
inters = inters.difference( set( path1 ) )
inters = inters.difference( set( path2 ) )
if inters and level == 0:
for e in inters: #these are many paths, have to clone
p1 = copy.copy( path1 )
p1.append( e )
p2 = copy.copy( path2 )
p2.append( e )
all_paths.append( ( p1,p2 ) )
return
for l1 in linked1:
if l1 in path1 or l1 in path2:
continue
for l2 in linked2:
if l2 in path1 or l2 in path2:
continue
_get_connections_internal2( l1, l2,
copy.copy( path1 ), copy.copy( path2 ),
all_paths,
depth+1,
max_depth,
level - 1 )
#Example: e._name(e._get_connections2('/m/0p_47', '/m/0cwtm')) <---returns two meeting paths starting from both entities
# e._name(e._get_connections('/m/0p_47', '/m/0cwtm', level=1)) <---search deeper
# e._name(e._get_connections('/m/0p_47', '/m/0cwtm', level=2)) <---even deeper
def _get_connections2( entity1, entity2, level = 0 ):
result = []
_get_connections_internal2( entity1, entity2, [], [], result, 0, 15, level )
return result
这是一个示例web2py控制器(只需复制web2py models目录中的edb.py):
# -*- coding: utf-8 -*-
def mid_to_url( mid ):
return mid.split('/')[2]
def index():
form = FORM( TABLE( TR( INPUT(_name='term', _value=request.vars.term ) ),
TR(INPUT(_type='submit', _value='Search') ) ),
_method='get')
typed, ranked = _search_by_name( request.vars.term )
rows = []
for r in ranked:
keys = []
for t in typed[r]:
k = _key( t )
if k:
keys.append( k )
rows.append( TR( TD( A(_name( r ),
_href = URL('result', args = [mid_to_url(r)]))),
TD( XML( '<br/>'.join( keys ) ) ) ) )
result = TABLE( *rows )
return {
'form': form,
'result' : result
}
def result():
path, data = '', ''
if not request.args:
return { 'path':path, 'data':data}
path_rows = []
for ra in range(len(request.args)):
if ra%2:
arrow_url = URL( 'static', 'images/blue_arr.png' )
display_name = _key('/m/'+request.args[ra]) #it's a property
else:
arrow_url = URL( 'static', 'images/red_arr.png' )
display_name = _name('/m/'+request.args[ra]) #it's a topic
path_rows.append( TD( A( display_name, _href=URL( args = request.args[0:ra+1] ) ) ) )
path_rows.append( TD( IMG( _src = arrow_url ) ) )
path = TABLE( *path_rows )
elems = [ '/m/'+a for a in request.args ]
if _mid( '/type/property' ) in _types( elems[-1] ): #we are rendering a property
objects = _get_ln( elems[-2], elems[-1] )
if not objects: #there should be a better way to see if this is anonymous
objects = _get_la( elems[-2], elems[-1] )
data = TABLE( *[ TR( TD( A(_name(o), _href = URL( args = request.args+[mid_to_url(o)])))) for o in objects ] )
else: #we are rendering a topic
direct_props = TABLE(*[TR(TD(_key(p)), TD(', '.join(_get_n( elems[-1], p)))) for p in _props_n( elems[-1] )])
linked_named_props = TABLE(*[TR(TD(A(_key(p),
_href = URL(args = request.args+[mid_to_url(p)])))) for p in _props_ln( elems[-1] ) ] )
linked_anon_props = TABLE(*[TR(TD(A(_key(p),
_href = URL(args = request.args+[mid_to_url(p)])))) for p in _props_la( elems[-1] ) ] )
data = TABLE( TR( TH( 'Linked named data:'), TH( 'Linked anonymous data:' ), TH( 'Direct data:' ) ),
TR( TD( linked_named_props ), TD( linked_anon_props ), TD( direct_props ) ) )
return { 'path': path, 'data':data }