设置hivevar：ds = 2018-12-01;

Question

我有一个按天分区的表。我尝试通过设置

插入数据

设置hivevar：ds = 2018-12-01;

然后使用** INSERT OVERWRITE表XTABLE分区（day ='$ {hivevar：ds}'）** 效果很好

但是当我喜欢下面的时候

设置hivevar：pd = date_add（'$ {hivevar：ds}'，-1）;

然后** INSERT OVERWRITE表XTABLE分区（day ='$ {hivevar：pd}'）** 它抛出错误。我认为问题是因为额外的引号，但找不到解决方法。

错误是：

无法识别常量中``date_add（'''2018''-'）附近的输入

MYCODE：

set hivevar:ds=2018-12-01;
set hivevar:pd=date_add('${hivevar:ds}',-1);
set hive.exec.dynamic.partition.mode=nonstrict;

CREATE TABLE IF NOT EXISTS XTABLE (emp_id BIGINT, start_time STRING, end_time STRING)
PARTITIONED BY(day STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';


--THIS IS WORKING FINE
INSERT OVERWRITE table XTABLE partition(day='${hivevar:ds}')
select distinct d.emp_id, d.start_time, d.end_time from
(
select emp_id, start_time, end_time from XTABLE where day='${hivevar:ds}'
) d;


--THIS IS THROWING AN ERROR cannot recognize input near ''date_add('' '2018' '-' in constant
--SEEMS PROBLEM IS WHILE SETTING THE VARIABLE
INSERT OVERWRITE table XTABLE partition(day='${hivevar:pd}')
select distinct d.emp_id, d.start_time, d.end_time from
(
select emp_id, start_time, end_time from XTABLE where day='${hivevar:pd}'
) d;

如果成功，则应显示如下消息：

将数据加载到表xtable分区（day = 2018-12-01）

Answer 1

@saicharan设置变量时无法添加功能。我曾经遇到过类似的问题。

set hivevar：ds ='应始终具有静态值'

要解决此问题，您需要创建一个简单的脚本，如下所示：

ds=`date -d "+1 day" +"%Y-%m-%d"`
echo $ds

hive --hivevar ds="${ds}" -e "INSERT OVERWRITE table XTABLE partition(day='${hivevar:ds}') "

这应该可以解决您的问题。让我知道它是否有效。

Answer 2

当前，您正在尝试使用带有规范中功能的静态分区进行插入。您可以使用动态分区插入，在数据集中提供分区：

import cv2
import os
import time
from scipy import spatial
import xml.etree.ElementTree as ET

def parse_annotation(ann_dir, labels=[]):
    all_imgs = []
    seen_labels = {}
    img = {'object': []}
    tree = ET.parse(ann_dir)

    for elem in tree.iter():
        if 'width' in elem.tag:
            img['width'] = int(elem.text)
        if 'height' in elem.tag:
            img['height'] = int(elem.text)
        if 'object' in elem.tag or 'part' in elem.tag:
            obj = {}

            for attr in list(elem):
                if 'name' in attr.tag:
                    obj['name'] = attr.text

                    if obj['name'] in seen_labels:
                        seen_labels[obj['name']] += 1
                    else:
                        seen_labels[obj['name']] = 1

                    if len(labels) > 0 and obj['name'] not in labels:
                        break
                    else:
                        img['object'] += [obj]

                if 'bndbox' in attr.tag:
                    for dim in list(attr):
                        if 'xmin' in dim.tag:
                            obj['xmin'] = int(round(float(dim.text)))
                        if 'ymin' in dim.tag:
                            obj['ymin'] = int(round(float(dim.text)))
                        if 'xmax' in dim.tag:
                            obj['xmax'] = int(round(float(dim.text)))
                        if 'ymax' in dim.tag:
                            obj['ymax'] = int(round(float(dim.text)))

    if len(img['object']) > 0:
        all_imgs += [img]

    return all_imgs, seen_labels

labels = ['RBC', 'WBC', 'Platelet']

for ann_file in os.listdir('C:/Users/Neerajan/Desktop/Blood-Cell-Count/Testing/Annotations'):
    ann_dir = 'C:/Users/Neerajan/Desktop/Blood-Cell-Count/Testing/Annotations/' + ann_file
    ground_truths, labels = parse_annotation(ann_dir, labels)
    print(ann_file, labels)

def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = (xB - xA + 1) * (yB - yA + 1)
    if interArea < 0:
        interArea = 0

    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

from darkflow.net.build import TFNet
import numpy as np

#Testing Dataset
options = {'model': 'C:/Users/Neerajan/Desktop/Blood-Cells- 
Project/cfg/tiny-yolo-voc-3c.cfg',
       'load': 3750,
       'threshold': 0.1,
       #'gpu': 0.7
      }

tfnet = TFNet(options)

avg_time = 0
pred_bb = []
pred_cls = []
pred_conf = []

for file_name in os.listdir('C:/Users/Neerajan/Desktop/Blood-Cell-Count/Testing/Images/'):
    tic = time.time()
    image = cv2.imread('C:/Users/Neerajan/Desktop/Blood-Cell-Count/Testing/Images/' + file_name)
    output = tfnet.return_predict(image)


    rbc = 0
    wbc = 0
    platelets = 0

    cell = []
    cls = []
    conf = []

    record = []
    tl_ = []
    br_ = []
    iou_ = []
    iou_value = 0

    for prediction in output:
        label = prediction['label']
        confidence = prediction['confidence']
        tl = (prediction['topleft']['x'], prediction['topleft']['y'])
        br = (prediction['bottomright']['x'], prediction['bottomright']['y'])
        if label == 'RBC' and confidence < .5:
            continue
        if label == 'WBC' and confidence < .25:
            continue
        if label == 'Platelets' and confidence < .25:
            continue

        # clearing up spurious platelets
        if label == 'Platelets':
            if record != []:
                tree = spatial.cKDTree(record)
                index = tree.query(tl)[1]
                iou_value = iou(tl + br, tl_[index] + br_[index])
                iou_.append(iou_value)
            if iou_value > 0.1:
                continue
            record.append(tl)
            tl_.append(tl)
            br_.append(br)

        # image = cv2.rectangle(image, tl, br,color, 2)
        center_x = int((tl[0] + br[0]) / 2)
        center_y = int((tl[1] + br[1]) / 2)
        center = (center_x, center_y)
        color = tuple(255 * np.random.rand(3))
        if label == 'RBC':
            color = (255, 0, 0)
            rbc = rbc + 1
        if label == 'WBC':
            color = (0, 255, 0)
            wbc = wbc + 1
        if label == 'Platelets':
            color = (0, 0, 255)
            platelets = platelets + 1
        radius = int((br[0] - tl[0]) / 2)
        image = cv2.circle(image, center, radius, color, 2)
        font = cv2.FONT_HERSHEY_COMPLEX
        image = cv2.putText(image, label, (center_x - 15, center_y + 5), font, .5, color, 1)
        cell.append([tl[0], tl[1], br[0], br[1]])
        if label == 'RBC': cls.append(0)
        if label == 'WBC': cls.append(1)
        if label == 'Platelets': cls.append(2)
        conf.append(confidence)
    pred_bb.append(cell)
    pred_cls.append(cls)
    pred_conf.append(conf)
    #cv2.putText(image, 'Total RBC: ' + str(rbc) + ', WBC: ' + str(wbc) + ', Platelets: ' + str(platelets), (0,image.shape[0] -10), cv2.FONT_HERSHEY_TRIPLEX, 0.5,  (0,0,0), 1)
    cv2.imshow('RBC: ' + str(rbc) + ', WBC: ' + str(wbc) + ', Platelets: ' + str(platelets), image)
    cv2.imwrite('C:/Users/Neerajan/Desktop/ReportDump/' + file_name, image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    toc = time.time()
    avg_time = avg_time + (toc - tic) * 1000

avg_time = avg_time / 60

这将起作用，但是由于分区修剪不适用于功能，因此可能导致表完全扫描。因此，最好的解决方案是在shell中计算date-1天并将其作为参数传递到HQL脚本中：

set hivevar:ds=2018-12-01;

set hive.exec.dynamic.partition.mode=nonstrict;

INSERT OVERWRITE table XTABLE partition(day)
select distinct d.emp_id, d.start_time, d.end_time from
(
select emp_id, start_time, end_time, day --partition present in dataset, also it can be date_sub('${hivevar:ds}',1) as day
from XTABLE where day=date_sub('${hivevar:ds}',1);
) d;

在脚本中使用ds=$(date +"%Y-%m-%d" --date " -1 day") hive --hiveconf ds="$ds" -f your_script.hql

我无法将数据（由集合声明）插入日分区表

设置hivevar：ds = 2018-12-01;

设置hivevar：pd = date_add（'$ {hivevar：ds}'，-1）;

无法识别常量中``date_add（'''2018''-'）附近的输入

2 个答案: