我正在尝试在MySQL中建立某种“个人”匹配数据库。
我需要将Members表中的特定人与同一表中的所有其他人进行比较。
表中的每一行(人)都有许多列及其信息(年龄,位置,宗教信仰等)。
我需要查询引用一个表,该表包含我每一列的“权重”。换句话说,我想说的是“位置在75岁时很重要,年龄范围在100岁时非常重要,宗教在10岁时不重要”。
会员表
+----+-------+----------+----------+-----+----------+
| ID | Name | Location | Religion | Age | AgeRange |
+----+-------+----------+----------+-----+----------+
| 1 | Joe | LA | Athiest | 40 | 30-40 |
+----+-------+----------+----------+-----+----------+
| 2 | Mary | LA | Agnostic | 35 | 35-45 |
+----+-------+----------+----------+-----+----------+
| 3 | Karen | NYC | Athiest | 45 | 30-35 |
+----+-------+----------+----------+-----+----------+
| 4 | Lisa | LA | Hindu | 30 | 45-55 |
+----+-------+----------+----------+-----+----------+
体重表(参数的重要性)
+----+-----+----------+----------+
| ID | Age | Location | Religion |
+----+-----+----------+----------+
| 1 | 100 | 75 | 10 |
+----+-----+----------+----------+
过去2天,我尝试了许多事情,但是我尝试使用的最新查询是这个,显然这没有什么用。它还没有指定将与这些记录进行比较的“人”。
SELECT a.first_name,
g.name,
a.age* g.age+
a.location* g.location+
a.religion * g.mwReligion AS metric
FROM members a, weight g
ORDER BY metric DESC;
我的预期输出如下:
乔比赛:
玛丽-得分= 285
(100是因为她在他的AgeRange中+ 100是因为他在她的AgeRange中+ 75是位置+ 10是宗教)
丽莎-得分= 175 (100,因为她在他的AgeRange中+ 75用于定位)
卡伦-得分= 10 (仅宗教比赛)
***非常感谢他们在此方面的帮助和顽强的努力!!!
答案 0 :(得分:2)
我会假设min_age
和max_age
列是独立的(而不是AgeRange
),包含值和INT
数据类型。您需要的查询应如下所示:
select
x.id,
x.name,
x.ma as match_age,
x.ml as match_location,
x.mr as match_religion,
x.ma * w.age + x.ml * w.location + x.mr * w.religion as total_score
from (
select
o.id,
o.name,
case when o.age between p.min_age and p.max_age then 1 else 0 end as ma,
case when o.location = p.location then 1 else 0 end as ml,
case when o.religion = p.religion then 1 else 0 end as mr
from (select * from members where id = 1) p -- selects Joe
cross join (select * from members where id <> 1) o -- select other members
) x
cross join weights w
答案 1 :(得分:1)
在MySQL中,布尔表达式在数字上下文中变为0或1。因此,您可以将比较用作要素。
因此,以一个成员ID低于其他成员的ID自我加入成员(否则,即,仅在检查不平等性时,结果对中的每个成员对都有两次)。然后交叉加入权重。
现在,您可以将指标构建为比较乘积和权重之和。
我假设宗教和比较是通过平等进行比较的。将一个人的年龄与另一个人的年龄范围进行比较,反之亦然。此外,我将年龄范围划分为上下限,并假定范围的范围是包括在内的。然后,它可能如下所示:
import gym
import numpy as np
import tensorflow as tf
#import rocket_lander_gym
EP_LEN = 200
GAMMA = 0.9
SL_LR = 1e-4
CR_LR = 1e-4
BATCH = 5
ACTOR_UPDATE_STEPS = 20
CRITIC_UPDATE_STEPS = 20
#STATE_DIM, ACT_DIM = 10, 3
STATE_DIM, ACT_DIM = 3, 1
METHOD = [
dict(name='kl_penalty', kl_target=0.01, lam=0.5),
dict(name='clip', epsilon=0.2),
][1]
PRINT_DEBUG_MSG = False
class PPO:
def __init__(self, env):
self.env = env
self.tfsess = tf.Session()
self.tf_state = tf.placeholder(tf.float32, [None, STATE_DIM], 'state')
# Critic (value network)
with tf.variable_scope('critic'):
# Layers
l1 = tf.layers.dense(self.tf_state, 100, tf.nn.relu)
# Value
self.value = tf.layers.dense(l1, 1)
# Discounted reward: reward in the furture
self.tf_dreward = tf.placeholder(tf.float32, [None, 1], 'discounted_reward')
# Advantage: determine quality of action
self.advantage = self.tf_dreward - self.value
# Loss function: minimize the advantage over time
# The loss function is a mean squared error
self.loss = tf.reduce_mean(tf.square(self.advantage))
# Gradient descent using Adam optimizer
self.train_opt = tf.train.AdamOptimizer(CR_LR)
gradients, variables = zip(*self.train_opt.compute_gradients(self.loss))
gradients, _ = tf.clip_by_global_norm(gradients, 1.0)
#self.train_opt = self.train_opt.apply_gradients(zip(gradients, variables))
self.ctrain_op = self.train_opt.apply_gradients(zip(gradients, variables))
# Actor (policy network)
pi, pi_params = self.tinynn('pi', trainable=True)
old_pi, old_pi_params = self.tinynn('old_pi', trainable=False)
# Sample actions from both the old and the new policy networks
with tf.variable_scope('sample_action'):
# Choose an action from the distribution learnt
self.sample_operation = tf.squeeze(pi.sample(1), axis=0)
with tf.variable_scope('update_old_pi'):
# Choose an action from the distribution learnt
self.update_old_pi_operation = [old_pi.assign(p) for p, old_pi in zip(pi_params, old_pi_params)]
# Placeholder for the action and the advantage
self.tf_action = tf.placeholder(tf.float32, [None, ACT_DIM], 'action')
self.tf_advantage = tf.placeholder(tf.float32, [None, 1], 'advantage')
# Compute loss function
with tf.variable_scope('loss'):
with tf.variable_scope('surrogate'):
#ratio = pi.prob(self.tf_advantage) / old_pi.prob(self.tf_advantage)
ratio = pi.prob(self.tf_action) / old_pi.prob(self.tf_action)
surrogate = ratio * self.tf_advantage
# KL penalty
if METHOD['name'] == 'kl_penalty':
# Lambda
self.tf_lambda = tf.placeholder(tf.float32, None, 'lambda')
# Compute KL divergence between old and new policy
kl = tf.contrib.distributions.kl_divergence(old_pi, pi)
# Get mean
self.kl_mean = tf.reduce_mean(kl)
# Compute loss using surrogate
self.aloss = -(tf.reduce_mean(surrogate - self.tf_lambda * kl))
else:
self.aloss = -tf.reduce_mean(tf.minimum(surrogate, tf.clip_by_value(ratio, 1.-METHOD['epsilon'], 1.+METHOD['epsilon']) * self.tf_advantage))
# Minimize the loss using gradient descent
with tf.variable_scope('atrain'):
self.atrain_operation = tf.train.AdamOptimizer(SL_LR)
gradients, variables = zip(*self.atrain_operation.compute_gradients(self.aloss))
gradients, _ = tf.clip_by_global_norm(gradients, 1.0)
#self.atrain_operation = self.atrain_operation.apply_gradients(zip(gradients, variables))
self.atrain_op = self.atrain_operation.apply_gradients(zip(gradients, variables))
# Write to disk
tf.summary.FileWriter("log/", self.tfsess.graph)
# Run the session
self.tfsess.run(tf.global_variables_initializer())
def update(self, state, action, reward):
self.tfsess.run(self.update_old_pi_operation)
advantage = self.tfsess.run(self.advantage, {self.tf_state: state, self.tf_dreward: reward})
# Update actor (policy)
if METHOD['name'] == 'kl_penalty':
for _ in range(ACTOR_UPDATE_STEPS):
_, kl = self.tfsess.run([self.atrain_operation, self.kl_mean], {self.tf_state: state, self.tf_action: action, tf_advantage: advantage, self.tf_lambda: METHOD['lam']})
if kl > 4*METHOD['kl_target']:
break
if kl < METHOD['kl_target'] / 1.5:
# Adaptive lambda
METHOD['lam'] /= 2
elif kl > METHOD['kl_target'] * 1.5:
METHOD['lam'] *= 2
# Lambda might explode, we need to clip it
METHOD['lam'] = np.clip(METHOD['lam'], 1e-4, 10)
else:
#[self.tfsess.run(self.atrain_operation, {self.tf_state: state, self.tf_action: action, self.tf_advantage: advantage}) for _ in range(ACTOR_UPDATE_STEPS)]
[self.tfsess.run(self.atrain_op, {self.tf_state: state, self.tf_action: action, self.tf_advantage: advantage}) for _ in range(ACTOR_UPDATE_STEPS)]
# Update critic (value)
#[self.tfsess.run(self.train_opt, {self.tf_state: state, self.tf_dreward: reward}) for _ in range(CRITIC_UPDATE_STEPS)]
[self.tfsess.run(self.ctrain_op, {self.tf_state: state, self.tf_dreward: reward}) for _ in range(CRITIC_UPDATE_STEPS)]
def tinynn(self, name, trainable):
with tf.variable_scope(name):
l1 = tf.layers.dense(self.tf_state, 100, tf.nn.relu, trainable=trainable)
#mu = 2 * tf.layers.dense(l1, ACT_DIM, tf.nn.tanh, trainable=trainable)
mu = self.env.action_space.high * tf.layers.dense(l1, ACT_DIM, tf.nn.tanh, name='mu', trainable=trainable)
sigma = tf.layers.dense(l1, ACT_DIM, tf.nn.softplus, trainable=trainable)
norm_dist = tf.distributions.Normal(loc=mu, scale=sigma)
params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name)
return norm_dist, params
def choose_action(self, state):
state = state[np.newaxis, :]
action = self.tfsess.run(self.sample_operation, {self.tf_state: state})[0]
#return np.clip(action, -1, 1)
return np.clip(action, self.env.action_space.low, self.env.action_space.high)
def get_value(self, state):
if state.ndim < 2: state = state[np.newaxis, :]
return self.tfsess.run(self.value, {self.tf_state: state})[0, 0]
def train(self, env, ppo, epochs, render=False):
# Rewards
all_ep_r = []
# Training loop
for ep in range(epochs):
# Initial state
s = env.reset()
# States, actions and rewards
buffer_s, buffer_a, buffer_r = [], [], []
# Initial reward
ep_r = 0
# For a single episode
for t in range(EP_LEN):
if render:
# Render the environment
env.render()
# Choose best action
a = ppo.choose_action(s)
# State,reward,done,info
s_, r, done, _ = env.step(a)
if PRINT_DEBUG_MSG:
print("Action Taken ",a)
print("Observation ",s_)
print("Reward Gained ",r, end='\n\n')
# Add to buffers
buffer_s.append(s)
buffer_a.append(a)
buffer_r.append((r+8)/8) # normalize reward, find to be useful
s = s_
# Total reward
ep_r += r
# Update PPO
if (t+1) % BATCH == 0 or t == EP_LEN - 1:
# Get value
v_s_ = ppo.get_value(s_)
# Discounted reward
discounted_r = []
# Update rewards
for r in buffer_r[::-1]:
v_s_ = r + GAMMA * v_s_
discounted_r.append(v_s_)
discounted_r.reverse()
# Buffer states actions rewards
bs, ba, br = np.vstack(buffer_s), np.vstack(buffer_a), np.array(discounted_r)[:, np.newaxis]
buffer_s, buffer_a, buffer_r = [], [], []
ppo.update(bs, ba, br)
# Check if done
if done:
#print("Simulation done.")
break
# Append episode rewards
if ep == 0: all_ep_r.append(ep_r)
else: all_ep_r.append(all_ep_r[-1]*0.9 + ep_r*0.1)
# Close the environment
env.close()
# Return all episode rewards
return all_ep_r
"""
if __name__ == '__main__':
ppo = PPO()
#env = gym.make('RocketLander-v0')
env = gym.make('Pendulum-v0')
reward = ppo.train(env, ppo, 100)
print(reward)
"""
if __name__ == '__main__':
#env = gym.make('RocketLander-v0')
env = gym.make('Pendulum-v0')
ppo = PPO(env)
reward = ppo.train(env, ppo, 100)
print(reward)