我想从“ FROM”和“ JOIN”获取表名。这是查询的示例:
SELECT * FROM film JOIN language ON film.language_id = language.language_id WHERE language_name = "English"
我在python中使用sqlparse库。这是我尝试过的:
line="FROM film INNER JOIN language ON film.language_id =
language.language_id WHERE language.name = 'English' GROUP BY language_id"
a = sqlparse.parse(line)[0].tokens
print (a)
Out[91]:
[<Keyword 'FROM' at 0x1EEA18CDAC8>,
<Whitespace ' ' at 0x1EEA18CDCA8>,
<Identifier 'film' at 0x1EEA18CE6D8>,
<Whitespace ' ' at 0x1EEA18CDB28>,
<Keyword 'INNER ...' at 0x1EEA18CDBE8>,
<Whitespace ' ' at 0x1EEA18CDC48>,
<Keyword 'langua...' at 0x1EEA18CDA68>,
<Whitespace ' ' at 0x1EEA18CDD08>,
<Keyword 'ON' at 0x1EEA18CDD68>,
<Whitespace ' ' at 0x1EEA18CDDC8>,
<Comparison 'film.l...' at 0x1EEA18CE7C8>,
<Whitespace ' ' at 0x1EEA18B61C8>,
<Where 'WHERE ...' at 0x1EEA18CE4F8>,
<Keyword 'GROUP ...' at 0x1EEA18B65E8>,
<Whitespace ' ' at 0x1EEA18B6648>,
<Identifier 'langua...' at 0x1EEA18CE750>]
我期望的输出:
table = [film,language]
有人可以帮助我吗?谢谢
答案 0 :(得分:0)
我改编了here建议的michaelshobbs解决方案。似乎可行,但我没有对其进行彻底的测试。
我使用的代码是这样的:
import itertools
import sqlparse
from sqlparse.sql import IdentifierList, Identifier
from sqlparse.tokens import Keyword, DML
def is_subselect(parsed):
if not parsed.is_group:
return False
for item in parsed.tokens:
if item.ttype is DML and item.value.upper() == 'SELECT':
return True
return False
def extract_from_part(parsed):
from_seen = False
for item in parsed.tokens:
if item.is_group:
for x in extract_from_part(item):
yield x
if from_seen:
if is_subselect(item):
for x in extract_from_part(item):
yield x
elif item.ttype is Keyword and item.value.upper() in ['ORDER', 'GROUP', 'BY', 'HAVING','GROUP BY']:
from_seen = False
StopIteration
else:
yield item
if item.ttype is Keyword and item.value.upper() == 'FROM':
from_seen = True
def extract_table_identifiers(token_stream):
for item in token_stream:
if isinstance(item, IdentifierList):
for identifier in item.get_identifiers():
value = identifier.value.replace('"', '').lower()
yield value
elif isinstance(item, Identifier):
value = item.value.replace('"', '').lower()
yield value
def extract_tables(sql):
# let's handle multiple statements in one sql string
extracted_tables = []
statements = list(sqlparse.parse(sql))
for statement in statements:
if statement.get_type() != 'UNKNOWN':
stream = extract_from_part(statement)
extracted_tables.append(set(list(extract_table_identifiers(stream))))
return list(itertools.chain(*extracted_tables))