Question

我正在过滤一些数据，例如table.x具有以下结构

column1 | c.2 |column3

0.0.0.0 | 20 | 2019-04-29 14:55:52
0.0.0.0 | 10 | 2019-04-29 14:45:52
0.0.0.0 | 50 | 2019-04-29 14:35:52
0.0.0.0 | 50 | 2019-04-29 14:25:52
0.0.0.0 | 90 | 2019-04-29 14:15:52
0.0.0.1 | 40 | 2019-04-29 14:05:52
0.0.0.1 | 40 | 2019-04-29 13:45:52
0.0.0.1 | 70 | 2019-04-29 13:30:52
0.0.0.4 | 20 | 2019-04-29 13:25:52

我希望结果集返回为

0.0.0.0 | 20 | 2019-04-29 14:55:52
0.0.0.1 | 40 | 2019-04-29 14:05:52
0.0.0.4 | 20 | 2019-04-29 13:25:52

Answer 1

如何将DISTINCT与一列一起使用：

SELECT DISTINCT ON (column1) column1, column2, column3 FROM table_name

注意。c.2不是有效的列名。

这里是在线演示https://rextester.com/FBIS74019

Answer 2

您需要通过分组来获取每个column3的最大值column1，然后加入表格：

select t.*
from tablename t inner join (
  select column1, max(column3) column3
  from tablename
  group by column1
) g on g.column1 = t.column1 and g.column3 = t.column3

Answer 3

您可以在第1列的最大col3组上使用内部联接

library(tidyverse)
df <- tibble::tribble(
    ~Subject, ~Testperiod, ~Condition, ~Trigger,
           1,           1,          0,        0,
           1,           2,          0,        0,
           1,           3,          1,        0,
           1,           4,          2,        0,
           1,           5,          3,        0,
           1,           6,          3,        1,
           1,           7,          1,        1,
           1,           8,          1,        1,
           1,           9,          0,        1,
           1,          10,          0,        1,
           1,          11,          0,        1,
           1,          12,          0,        0,
           2,           1,          0,        0,
           2,           2,          2,        0,
           2,           3,          3,        0,
           2,           4,          3,        0,
           2,           5,          3,        2,
           2,           6,          2,        2,
           2,           7,          1,        1,
           2,           8,          2,        1,
           2,           9,          0,        1,
           2,          10,          0,        0,
           2,          11,          0,        0,
           2,          12,          0,        0
    )

colchanges <- which(df$Trigger != dplyr::lag(df$Trigger))
ChangesDF <- cbind(rownum = colchanges,value = df[colchanges,"Trigger"])
rows <- dplyr::filter(ChangesDF,Trigger %in% c(1:3)) %>% select(rownum) %>% 
    mutate(One = rownum - 1,
           Two = rownum - 2,
           Three = rownum - 3)
rows <- sort(as.vector(t(rows)))
rows <- rows[rows > 0]

FinalDF <- df[rows,]
FinalDF
# A tibble: 12 x 4
   Subject Testperiod Condition Trigger
     <dbl>      <dbl>     <dbl>   <dbl>
 1       1          3         1       0
 2       1          4         2       0
 3       1          5         3       0
 4       1          6         3       1
 5       2          2         2       0
 6       2          3         3       0
 7       2          4         3       0
 8       2          4         3       0
 9       2          5         3       2
10       2          5         3       2
11       2          6         2       2
12       2          7         1       1

Answer 4

尝试以下SQL代码：


import keras
import numpy
import math
import sys
from keras.initializers import glorot_uniform
import keras.backend as K

if len(sys.argv) < 2:
  print('tell me how many samples to generate')
  sys.exit(-1)

numSamples = int(sys.argv[1])


learningRate = 0.1

model = keras.models.Sequential()
model.add(keras.layers.Dense(2, activation='sigmoid', input_dim=2))
model.add(keras.layers.Dense(1, activation='sigmoid'))
model.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(lr=learningRate), metrics=['accuracy'])

initial_weights = model.get_weights()

backend_name = K.backend()
if backend_name == 'tensorflow': 
    k_eval = lambda placeholder: placeholder.eval(session=K.get_session())
elif backend_name == 'theano': 
    k_eval = lambda placeholder: placeholder.eval()
else: 
    raise ValueError("Unsupported backend")


X = [ [ 0, 0 ], [ 0, 1 ], [ 1, 0 ], [ 1, 1 ] ]
Y = [ [ 1 ], [ 0 ], [ 0 ], [ 1 ] ]

XX = numpy.array(X)
YY = numpy.array(Y)

lastModel = None

earlyStopping = keras.callbacks.EarlyStopping(monitor='loss', min_delta=0, patience=10, verbose=1)

from timeit import default_timer as timer

for i in range(numSamples):
  new_weights = [k_eval(glorot_uniform()(w.shape)) for w in initial_weights]
  model.set_weights(new_weights)

  start = timer()

  model.fit(XX, YY, verbose=0, epochs=1000000, callbacks=[earlyStopping], validation_data=(XX, YY))

  end = timer()
  elapsed = end - start
  print("Elapsed seconds to fit new solution:", elapsed)

  score = model.evaluate(XX, YY, verbose=0)
  print()
  print('loss', score)

  layer0 = model.layers[0].get_weights()
  layer1 = model.layers[1].get_weights()

  weight0 = layer0[0]
  bias0 = layer0[1]
  w1 = weight0[0][0]
  w2 = weight0[0][1]
  w3 = weight0[1][0]
  w4 = weight0[1][1]
  b1 = bias0[0]
  b2 = bias0[1]

  weight1 = layer1[0]
  bias1 = layer1[1]
  w5 = weight1[0][0]
  w6 = weight1[1][0]
  b3 = bias1[0]

  w = [w1, w2, w3, w4, w5, w6, b1, b2, b3]
  print(w)

  if lastModel is not None:
    sum = 0
    for i in range(len(w)):
      sum = sum + (w[i] - lastModel[i]) * (w[i] - lastModel[i])
    rms = math.sqrt(sum) / len(w)
    print("rms from last solution = ", rms)
  lastModel = w
  for i in range(len(X)):
    z01 = X[i][0] * w1 + X[i][1] * w3 + b1
    z11 = X[i][0] * w2 + X[i][1] * w4 + b2
    a01 = 1.0 / (1.0 + math.exp(-z01))
    a11 = 1.0 / (1.0 + math.exp(-z11))

    z2 = a01 * w5 + a11 * w6 + b3
    yHat = 1.0 / (1.0 + math.exp(-z2))
    print(X[i], Y[i], yHat)

结果是：

SELECT max(column1), column2, max(column3)   --maximum of IP address and time
FROM yourTable
GROUP BY column1                             --grouped by IP address

Answer 5

尝试使用窗口功能

select 
    column1, column2, column3
from (
    SELECT 
        column1, column2, column3,
        row_number() over ( partition by column1 ) pbg
    FROM tablename
) aaaa
where aaaa.pbg = 1

有没有一种方法可以将表格中的每列值限制为仅1行？

5 个答案: