使用查找矩阵中最接近的值更新表

时间:2015-06-03 09:45:20

标签: sql sql-server matrix sql-update lookup-tables

表1

第一个表是包含参考值的矩阵,如下所示:

create table dm_matrix
(x float,
z float,
avgValue float)


insert into dm_matrix values (1,1, rand())
insert into dm_matrix values (1,2, rand())
...
insert into dm_matrix values (4,3, rand())
insert into dm_matrix values (4,4, rand())

创建像这样的矩阵

z\x |  1  |  2  |  3  |  4  |
-----------------------------
  1 |  .1 |  .7 |  .3 |  .2 |
  2 |  .5 |  .1 |  .8 |  .6 |
  3 |  .6 |  .2 |  .3 |  .9 |
  4 |  .4 |  .3 |  .3 |  .5 |

表2

第二个表是具有坐标的事件列表:

create table dm_values 
(vx float,
vz float,
v float)

insert into dm_values (vx, vz) values (1 + rand()*3, 1 + rand()*3)
...
insert into dm_values (vx, vz) values (1 + rand()*3, 1 + rand()*3)

因此条目具有坐标,但没有事件的值

   vx  |  vz  |   v   |
-----------------------
  1.3  |  2.7 |  null |
  2.6  |  2.7 |  null |
  1.3  |  3.3 |  null |
  1.9  |  1.1 |  null |
  3.0  |  2.9 |  null |
  ...  |  ... |   ... |

TASK

我想用第一个表中最接近的值更新我的第二个表。因此,对于第一个条目(x = 1.3,z = 2.7),我希望将值更新为查找矩阵中的值,其中x = 1且z = 3(.6)。更新后的表格如下所示:

   vx  |  vz  |   v   |
-----------------------
  1.3  |  2.7 |   0.6 |
  2.6  |  2.7 |   0.3 |
  1.3  |  3.3 |   0.6 |
  1.9  |  1.1 |   0.7 |
  3.0  |  2.9 |   0.3 |
  ...  |  ... |   ... |

问题

理论上,我可以使用游标来处理游标。但是我的表1有大约2000行(~50x40),而我的TABLE 2行有数百万行。光标方法需要数天,数周或数月才能完成。我需要一个性能更好的解决方案,并感谢任何提示。

2 个答案:

答案 0 :(得分:2)

我认为你应该使用 $text = "Hello Mr. Xyz," . $crnl . "Some stupid text as an example." . $crnl; $content = chunk_split(base64_encode($file)); $headers = "MIME-Version: 1.0" . $crnl; $headers .= "From: " . $mail_from . $crnl; $headers .= "Content-Type: multipart/mixed; boundary=\"" . $boundary1 . "\"" . $nl . $crnl; $headers .= "Content-Type: text/plain" . $nl; $headers .= "--" . $boundary1 . $crnl; $headers .= 'Content-Length: '.strlen($text) . $crnl; $headers .= "Content-Transfer-Encoding: 8-bit" . $nl . $crnl; $headers .= $text; $headers .= "--" . $boundary1 . $crnl; $headers .= "Content-Type: application/excel; charset=\"ISO8859-1\"; name=\"xyz.csv\"" . $nl . $crnl; $headers .= "--" . $boundary1 . $crnl; $headers .= "Content-Disposition: attachment; filename=\"xyz.csv\"" . $crnl; $headers .= 'Content-Length: ' . strlen($content) . $crnl; $headers .= "Content-Transfer-Encoding: base64" . $nl . $crnl; $headers .= $content; $headers .= "--" . $boundary1 . "--" 或相关的子查询。

cross apply

我不确定你使用的距离是什么函数,但欧几里德距离似乎是一种合理的解释。

答案 1 :(得分:1)

从我所看到的情况来看,您应该能够使用UPDATE执行简单JOINROUND vxvz的值对于JOIN条件,性能方面,您必须在数据集上对此进行测试。

以下是JOIN数据的基本方法,请注意我已填写INSERT脚本以获得完整的矩阵:

CREATE TABLE #dm_matrix
    (
      x FLOAT ,
      z FLOAT ,
      avgValue DECIMAL(2, 1)
    )


INSERT  INTO #dm_matrix
VALUES  ( 1, 1, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 1, 2, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 1, 3, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 1, 4, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 2, 1, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 2, 2, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 2, 3, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 2, 4, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 3, 1, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 3, 2, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 3, 3, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 3, 4, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 4, 1, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 4, 2, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 4, 3, RAND() )
INSERT  INTO #dm_matrix
VALUES  ( 4, 4, RAND() )

SELECT  *
FROM    #dm_matrix

CREATE TABLE #dm_values
    (
      vx DECIMAL(2, 1) ,
      vz DECIMAL(2, 1) ,
      v FLOAT
    )

INSERT  INTO #dm_values
        ( vx, vz )
VALUES  ( 1 + RAND() * 3, 1 + RAND() * 3 )
INSERT  INTO #dm_values
        ( vx, vz )
VALUES  ( 1 + RAND() * 3, 1 + RAND() * 3 )

SELECT  *
FROM    #dm_values

-- replace this SELECT with the UPDATE commands below to update values
SELECT  v.vx ,
        v.vz ,
        m.avgValue
FROM    #dm_values v
        INNER JOIN #dm_matrix m ON ROUND(v.vx, 0) = m.x
                                   AND ROUND(v.vz, 0) = m.z

DROP TABLE #dm_matrix
DROP TABLE #dm_values

对于UPDATE你会做这样的事情:

UPDATE v
SET v.v = m.avgValue
FROM #dm_values v 
INNER JOIN #dm_matrix m ON ROUND(v.vx, 0) = m.x AND ROUND(v.vz, 0) = m.z

SELECT * FROM #dm_values

<强>产地:

<强>矩阵

x   z   avgValue
1   1   0.6
1   2   0.9  -- row 2 below
1   3   0.4
1   4   0.5
2   1   0.7
2   2   0.4
2   3   0.5  -- row 1 below
2   4   0.5
3   1   0.4
3   2   0.1
3   3   0.3
3   4   0.8
4   1   0.1
4   2   1.0
4   3   0.5
4   4   0.5  

<强>值:

vx  vz  v
1.8 2.8 NULL  -- x = 2, z = 3
1.3 1.5 NULL  -- x = 1, z = 2

更新后:

vx  vz  v
1.8 2.8 0.5
1.3 1.5 0.9

注意:

出于本文的目的,我已将数据类型更改为DECIMAL(2, 1),因此您可能需要根据实际数据集对其进行修改。