使用多表连接创建BigQuery语句

时间:2014-01-04 00:58:38

标签: sql join google-bigquery

我遇到了BigQuery的问题,我想我在某个地方错过了一个问题:

SELECT 
    timestamp, 
    REGEXP_EXTRACT(Analyte.id,r'^Patient, (\d+)') as patientId, 
    Analyte.contextId as contextId, 
    Analyte.analyteServiceCode as analyteServiceCode, 
    Analyte.resultRefRangeLow as analyteLowValue, 
    Analyte.resultRefRangeHigh as analyteHighValue, 
    Analyte.resultValue as resultValue 
FROM [lustgarten_sandbox.Analyte] as Analyte 
join (
    SELECT 
        AnalyteMapping.CONTEXTID as contextId, 
        AnalyteMapping.CODE as analyteServiceCode, 
        AnalyteName.NAME
    FROM [lustgarten_sandbox.AnalyteMapping] as AnalyteMapping 
    join [lustgarten_sandbox.AnalyteName] as AnalyteName
    on AnalyteName.id = AnalyteMapping.id
    where upper(AnalyteName.NAME) = 'Analyte' ) as SpecificAnalyte 
on  SpecificAnalyte.contextId = Analyte.contextId 
    and SpecificAnalyte.analyteServiceCode = Analyte.analyteServiceCode
join 
SELECT 
    ldePatientId as ptID, 
    origBreedCode, 
    speciesId, 
    origBreedName, 
    origSpeciesCode, 
    breedId, 
    birthDate.string, 
    birthDate.date_time, 
    Analyte.timestamp, 
    Analyte.patientId, 
    Analyte.contextId, 
    Analyte.analyteServiceCode as analyteServiceCode, 
    Analyte.analyteLowValue, Analyte.analyteHighValue, 
    Analyte.resultValue
FROM [lustgarten_sandbox.Patient] as Patient
    on Analyte.patientId = Patient.ptID 
where 
    Patient.ptID IS NOT NULL 
    AND Patient.speciesId IS NOT NULL Limit 12000;

如果有人可以帮助我,因为我似乎无法找到错误。让我知道我哪里出错了!

2 个答案:

答案 0 :(得分:3)

让我美化查询:

SELECT
  timestamp,
  REGEXP_EXTRACT(Analyte.id,
    r'^Patient,
    (\d+)') AS patientId,
  Analyte.contextId AS contextId,
  Analyte.analyteServiceCode AS analyteServiceCode,
  Analyte.resultRefRangeLow AS analyteLowValue,
  Analyte.resultRefRangeHigh AS analyteHighValue,
  Analyte.resultValue AS resultValue
FROM
  [lustgarten_sandbox.Analyte] AS Analyte
JOIN (
  SELECT
    AnalyteMapping.CONTEXTID AS contextId,
    AnalyteMapping.CODE AS analyteServiceCode,
    AnalyteName.NAME
  FROM
    [lustgarten_sandbox.AnalyteMapping] AS AnalyteMapping
  JOIN
    [lustgarten_sandbox.AnalyteName] AS AnalyteName
  ON
    AnalyteName.id = AnalyteMapping.id
  WHERE
    UPPER(AnalyteName.NAME) = 'Analyte'
    ) AS SpecificAnalyte
ON
  SpecificAnalyte.contextId = Analyte.contextId
  AND SpecificAnalyte.analyteServiceCode = Analyte.analyteServiceCode
JOIN
SELECT
  ldePatientId AS ptID,
  origBreedCode,
  speciesId,
  origBreedName,
  origSpeciesCode,
  breedId,
  birthDate.string,
  birthDate.date_time,
  Analyte.timestamp,
  Analyte.patientId,
  Analyte.contextId,
  Analyte.analyteServiceCode AS analyteServiceCode,
  Analyte.analyteLowValue,
  Analyte.analyteHighValue,
  Analyte.resultValue
FROM
  [lustgarten_sandbox.Patient] AS Patient
ON
  Analyte.patientId = Patient.ptID
WHERE
  Patient.ptID IS NOT NULL
  AND Patient.speciesId IS NOT NULL
LIMIT
  12000;

“错误:遇到”“加入”“加入”“第30行,第1栏。期待:”

现在更容易发现问题:加入患者需要括号的子查询:

像这样:

SELECT
  timestamp,
  REGEXP_EXTRACT(Analyte.id,
    r'^Patient,
    (\d+)') AS patientId,
  Analyte.contextId AS contextId,
  Analyte.analyteServiceCode AS analyteServiceCode,
  Analyte.resultRefRangeLow AS analyteLowValue,
  Analyte.resultRefRangeHigh AS analyteHighValue,
  Analyte.resultValue AS resultValue
FROM
  [lustgarten_sandbox.Analyte] AS Analyte
JOIN (
  SELECT
    AnalyteMapping.CONTEXTID AS contextId,
    AnalyteMapping.CODE AS analyteServiceCode,
    AnalyteName.NAME
  FROM
    [lustgarten_sandbox.AnalyteMapping] AS AnalyteMapping
  JOIN
    [lustgarten_sandbox.AnalyteName] AS AnalyteName
  ON
    AnalyteName.id = AnalyteMapping.id
  WHERE
    UPPER(AnalyteName.NAME) = 'Analyte'
    ) AS SpecificAnalyte
ON
  SpecificAnalyte.contextId = Analyte.contextId
  AND SpecificAnalyte.analyteServiceCode = Analyte.analyteServiceCode
JOIN (
  SELECT
    ldePatientId AS ptID,
    origBreedCode,
    speciesId,
    origBreedName,
    origSpeciesCode,
    breedId,
    birthDate.string,
    birthDate.date_time,
    Analyte.timestamp,
    Analyte.patientId,
    Analyte.contextId,
    Analyte.analyteServiceCode AS analyteServiceCode,
    Analyte.analyteLowValue,
    Analyte.analyteHighValue,
    Analyte.resultValue
  FROM
    [lustgarten_sandbox.Patient]) AS Patient
ON
  Analyte.patientId = Patient.ptID
WHERE
  Patient.ptID IS NOT NULL
  AND Patient.speciesId IS NOT NULL
LIMIT
  12000;

现在我们有了“错误:28.1 - 0.0:查询不能有多个JOIN子句”。

唯一剩下的步骤是将所述JOIN移动到子查询中(就像您已经使用“AnalyteName.id = AnalyteMapping.id”一样),以使其起作用。

例如: (我没有确定的数据或表格结构)

SELECT
  ptID,
  origBreedCode,
  speciesId,
  origBreedName,
  origSpeciesCode,
  breedId,
  birthDate.string,
  birthDate.date_time,
  Analyte.timestamp,
  Analyte.patientId,
  Analyte.contextId,
  Analyte.analyteServiceCode AS analyteServiceCode,
  Analyte.analyteLowValue,
  Analyte.analyteHighValue,
  Analyte.resultValue
FROM (
  SELECT
    timestamp,
    REGEXP_EXTRACT(Analyte.id,
      r'^Patient,
      (\d+)') AS patientId,
    Analyte.contextId AS contextId,
    Analyte.analyteServiceCode AS analyteServiceCode,
    Analyte.resultRefRangeLow AS analyteLowValue,
    Analyte.resultRefRangeHigh AS analyteHighValue,
    Analyte.resultValue AS resultValue
  FROM
    [lustgarten_sandbox.Analyte] AS Analyte
  JOIN (
    SELECT
      AnalyteMapping.CONTEXTID AS contextId,
      AnalyteMapping.CODE AS analyteServiceCode,
      AnalyteName.NAME
    FROM
      [lustgarten_sandbox.AnalyteMapping] AS AnalyteMapping
    JOIN
      [lustgarten_sandbox.AnalyteName] AS AnalyteName
    ON
      AnalyteName.id = AnalyteMapping.id
    WHERE
      UPPER(AnalyteName.NAME) = 'Analyte'
      ) AS SpecificAnalyte
  ON
    SpecificAnalyte.contextId = Analyte.contextId
    AND SpecificAnalyte.analyteServiceCode = Analyte.analyteServiceCode
    ) Analyte
JOIN (
  SELECT
    ldePatientId AS ptID,
    speciesId
  FROM
    [lustgarten_sandbox.Patient]) AS Patient
ON
  Analyte.patientId = Patient.ptID
WHERE
  Patient.ptID IS NOT NULL
  AND Patient.speciesId IS NOT NULL
LIMIT
  12000;

答案 1 :(得分:1)

你错过了几个括号。在代码中查找我的评论:

SELECT
    timestamp,
    REGEXP_EXTRACT(Analyte.id,r'^Patient, (\d+)') as patientId,
    Analyte.contextId as contextId, 
    Analyte.analyteServiceCode as analyteServiceCode,
    Analyte.resultRefRangeLow as analyteLowValue,
    Analyte.resultRefRangeHigh as analyteHighValue, 
    Analyte.resultValue as resultValue
FROM
    [lustgarten_sandbox.Analyte] as Analyte 
JOIN
    (
    SELECT
        AnalyteMapping.CONTEXTID as contextId,
        AnalyteMapping.CODE as analyteServiceCode,
        AnalyteName.NAME
    FROM
        [lustgarten_sandbox.AnalyteMapping] as AnalyteMapping
    JOIN
        [lustgarten_sandbox.AnalyteName] as AnalyteName
        ON AnalyteName.id = AnalyteMapping.id
    WHERE
        upper(AnalyteName.NAME) = 'Analyte'
    ) as SpecificAnalyte 
    ON SpecificAnalyte.contextId = Analyte.contextId
    AND SpecificAnalyte.analyteServiceCode = Analyte.analyteServiceCode
JOIN
    ( -- Added This
    SELECT
        ldePatientId as ptID,
        origBreedCode,
        speciesId,
        origBreedName,
        origSpeciesCode,
        breedId,
        birthDate.string,
        birthDate.date_time,
        Analyte.timestamp, 
        Analyte.patientId,
        Analyte.contextId,
        Analyte.analyteServiceCode as analyteServiceCode,
        Analyte.analyteLowValue,
        Analyte.analyteHighValue,
        Analyte.resultValue
    FROM
        [lustgarten_sandbox.Patient] as Patient
        ON Analyte.patientId = Patient.ptID
    WHERE
        Patient.ptID IS NOT NULL AND
        Patient.speciesId IS NOT NULL
    ) Patient -- Added This
LIMIT 12000;