我试图在Postgresql中运行此查询。我得到了我想要的结果,但我试图将结果限制为列中的值。但是,它不允许我访问该列:
select count(event), event, u.userid, u.campaign_id,
rank() over (partition by u.campaign_id order by count(event) desc) as THISHERE
from events e join users u on u.userid = e.userid
where THISHERE=1
group by event, 3 , 4
order by 1 desc
limit 20;
错误:列"其中"不存在
答案 0 :(得分:1)
您需要使用子查询:
import numpy
import pandas as pd
import pyodbc
import time
class MyDfInsert:
def __init__(self, cnxn, sql_stub, data_frame, rows_per_batch=1000):
# NB: hard limit is 1000 for SQL Server table value constructor
self._rows_per_batch = 1000 if rows_per_batch > 1000 else rows_per_batch
self._cnxn = cnxn
self._sql_stub = sql_stub
self._num_columns = None
self._row_placeholders = None
self._num_rows_previous = None
self._all_placeholders = None
self._sql = None
row_count = 0
param_list = list()
for df_row in data_frame.itertuples():
param_list.append(tuple(df_row[1:])) # omit zero-based row index
row_count += 1
if row_count >= self._rows_per_batch:
self._send_insert(param_list) # send a full batch
row_count = 0
param_list = list()
self._send_insert(param_list) # send any remaining rows
def _send_insert(self, param_list):
if len(param_list) > 0:
if self._num_columns is None:
# print('[DEBUG] (building items that depend on the number of columns ...)')
# this only happens once
self._num_columns = len(param_list[0])
self._row_placeholders = ','.join(['?' for x in range(self._num_columns)])
# e.g. '?,?'
num_rows = len(param_list)
if num_rows != self._num_rows_previous:
# print('[DEBUG] (building items that depend on the number of rows ...)')
self._all_placeholders = '({})'.format('),('.join([self._row_placeholders for x in range(num_rows)]))
# e.g. '(?,?),(?,?),(?,?)'
self._sql = f'{self._sql_stub} VALUES {self._all_placeholders}'
self._num_rows_previous = num_rows
params = [int(element) if isinstance(element, numpy.int64) else element
for row_tup in param_list for element in row_tup]
# print('[DEBUG] sql: ' + repr(self._sql))
# print('[DEBUG] params: ' + repr(params))
crsr = self._cnxn.cursor()
crsr.execute(self._sql, params)
if __name__ == '__main__':
conn_str = (
'DRIVER=ODBC Driver 11 for SQL Server;'
'SERVER=192.168.1.134,49242;'
'Trusted_Connection=yes;'
)
cnxn = pyodbc.connect(conn_str, autocommit=True)
crsr = cnxn.cursor()
crsr.execute("CREATE TABLE #tmp (id INT PRIMARY KEY, txt NVARCHAR(50))")
df = pd.read_csv(r'C:\Users\Gord\Desktop\Query1.txt', sep=';', header=0)
t0 = time.time()
MyDfInsert(cnxn, "INSERT INTO #tmp (id, txt)", df, rows_per_batch=1000)
print()
print(f'Inserts completed in {time.time() - t0:.2f} seconds.')
cnxn.close()
我认为您也可以使用select eu.*
from (select count(event), event, u.userid, u.campaign_id,
rank() over (partition by u.campaign_id order by count(event) desc) as THISHERE
from events e join
users u
on u.userid = e.userid
group by event, 3 , 4
) eu
where THISHERE = 1
order by 1 desc
limit 20;
执行您想要的操作:
distinct on