使用数据浏览器(SEDE),我想查找在 Stack Overflow 上哪些用户的信誉超过200000,然后查找其拥有的任何帐户的详细信息在其他Stack Exchange网站上。
以下是为列表提供此阈值的查询:
Select id, reputation, accountid
From users
Where reputation > 200000
AccountId
是所有Stack Exchange网站的密钥。
我找到了this query for aggregating across SEDE databases,但是根据上一个/基准查询的动态结果怎么做呢?
这是我想要的输出类型
id_so, reputation_so, accounted, other_stackexchange_site_name, reputation_othersite, number_of_answers_other_site, number_of_questions_other_site
1, 250000, 23, serverfault, 500, 5, 1
1, 250000, 23, superuser, 120, 1, 0
2, 300000, 21, serverfault, 300, 3, 2
2, 300000, 21, webmasters, 230, 1, 1
3, 350000, 20, NA, NA, NA, NA
#the case with id 3 has an SO profile with reputation but it has no other profile in other Stack Exchange site
答案 0 :(得分:1)
要基于初始查询跨数据库运行非平凡查询:
AccountId
(这是用户在Stack-Exchange范围内的ID)。创建您的初始查询,以将该键输入到临时表中。在这种情况下:
CREATE TABLE #UsersOfInterest (AccountId INT)
INSERT INTO #UsersOfInterest
SELECT u.AccountId
FROM Users u
Where u.Reputation > 200000
确定查询要在每个站点上运行,以获取所需的信息。 EG:
SELECT u.AccountId, u.DisplayName, u.Reputation, u.Id
, numQst = (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 1)
, numAns = (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 2)
FROM Users u
WHERE u.AccountId = ##seAccntId##
使用系统查询来获取适当的数据库。对于数据浏览器(SEDE),此类型的查询:
SELECT name
FROM sys.databases
WHERE CASE WHEN state_desc = 'ONLINE'
THEN OBJECT_ID (QUOTENAME (name) + '.[dbo].[PostNotices]', 'U')
END
IS NOT NULL
在上述查询上创建光标,并使用它逐步浏览数据库。
对于每个数据库:
sp_executesql
运行查询字符串。完成光标后,对步骤3中的临时表执行最终查询。
有关查询所有Stack Exchange网站的工作模板,请参见this other answer。
将所有内容放在一起,将产生以下查询,您可以运行live on SEDE :
-- MinMasterSiteRep: User's must have this much rep on whichever site this query is run against
-- MinRep: User's must have this much rep on all other sites
CREATE TABLE #UsersOfInterest (
AccountId INT NOT NULL
, Reputation INT
, UserId INT
, PRIMARY KEY (AccountId)
)
INSERT INTO #UsersOfInterest
SELECT u.AccountId, u.Reputation, u.Id
FROM Users u
Where u.Reputation > ##MinMasterSiteRep:INT?200000##
CREATE TABLE #AllSiteResults (
[Master Rep] INT
, [Mstr UsrId] NVARCHAR(777)
, AccountId NVARCHAR(777)
, [Site name] NVARCHAR(777)
, [Username on site] NVARCHAR(777)
, [Rep] INT
, [# Ans] INT
, [# Qst] INT
)
DECLARE @seDbName AS NVARCHAR(777)
DECLARE @seSiteURL AS NVARCHAR(777)
DECLARE @sitePrettyName AS NVARCHAR(777)
DECLARE @seSiteQuery AS NVARCHAR(max)
DECLARE seSites_crsr CURSOR FOR
WITH dbsAndDomainNames AS (
SELECT dbL.dbName
, STRING_AGG (dbL.domainPieces, '.') AS siteDomain
FROM (
SELECT TOP 50000 -- Never be that many sites and TOP is needed for order by, below
name AS dbName
, value AS domainPieces
, row_number () OVER (ORDER BY (SELECT 0)) AS [rowN]
FROM sys.databases
CROSS APPLY STRING_SPLIT (name, '.')
WHERE CASE WHEN state_desc = 'ONLINE'
THEN OBJECT_ID (QUOTENAME (name) + '.[dbo].[PostNotices]', 'U') -- Pick a table unique to SE data
END
IS NOT NULL
ORDER BY dbName, [rowN] DESC
) AS dbL
GROUP BY dbL.dbName
)
SELECT REPLACE (REPLACE (dadn.dbName, 'StackExchange.', ''), '.', ' ' ) AS [Site Name]
, dadn.dbName
, CASE -- See https://meta.stackexchange.com/q/215071
WHEN dadn.dbName = 'StackExchange.Mathoverflow.Meta'
THEN 'https://meta.mathoverflow.net/'
-- Some AVP/Audio/Video/Sound kerfuffle?
WHEN dadn.dbName = 'StackExchange.Audio'
THEN 'https://video.stackexchange.com/'
-- Ditto
WHEN dadn.dbName = 'StackExchange.Audio.Meta'
THEN 'https://video.meta.stackexchange.com/'
-- Normal site
ELSE 'https://' + LOWER (siteDomain) + '.com/'
END AS siteURL
FROM dbsAndDomainNames dadn
WHERE (dadn.dbName = 'StackExchange.Meta' OR dadn.dbName NOT LIKE '%Meta%')
-- Step through cursor
OPEN seSites_crsr
FETCH NEXT FROM seSites_crsr INTO @sitePrettyName, @seDbName, @seSiteURL
WHILE @@FETCH_STATUS = 0
BEGIN
SET @seSiteQuery = '
USE [' + @seDbName + ']
INSERT INTO #AllSiteResults
SELECT
uoi.Reputation AS [Master Rep]
, ''site://u/'' + CAST(uoi.UserId AS NVARCHAR(88)) + ''|'' + CAST(uoi.UserId AS NVARCHAR(88)) AS [Mstr UsrId]
, [AccountId] = ''https://stackexchange.com/users/'' + CAST(u.AccountId AS NVARCHAR(88)) + ''?tab=accounts|'' + CAST(u.AccountId AS NVARCHAR(88))
, ''' + @sitePrettyName + ''' AS [Site name]
, ''' + @seSiteURL + ''' + ''u/'' + CAST(u.Id AS NVARCHAR(88)) + ''|'' + u.DisplayName AS [Username on site]
, u.Reputation AS [Rep]
, (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 2) AS [# Ans]
, (SELECT COUNT(q.Id) FROM Posts q WHERE q.OwnerUserId = u.Id AND q.PostTypeId = 1) AS [# Qst]
FROM #UsersOfInterest uoi
INNER JOIN Users u ON uoi.AccountId = u.AccountId
WHERE u.Reputation > ##MinRep:INT?200##
'
EXEC sp_executesql @seSiteQuery
FETCH NEXT FROM seSites_crsr INTO @sitePrettyName, @seDbName, @seSiteURL
END
CLOSE seSites_crsr
DEALLOCATE seSites_crsr
SELECT *
FROM #AllSiteResults
ORDER BY [Master Rep] DESC, AccountId, [Rep] DESC
其结果如下:
-蓝色值超链接。
请注意,用户在网站上必须拥有200个代表才能使其“重要”。这也是将网站包含在Stack Exchange风格中所需要的代表。