create external table H_LINEITEM_EXT
(
L_ORDERKEY string,
L_PARTKEY string,
L_SUPPKEY string,
L_LINENUMBER string,
L_QUANTITY string,
L_EXTENDEDPRICE string,
L_DISCOUNT string,
L_TAX string,
L_RETURNFLAG string,
L_LINESTATUS string,
L_SHIPDATE date ,
L_COMMITDATE date ,
L_RECEIPTDATE date ,
L_SHIPINSTRUCT string,
L_SHIPMODE string,
L_COMMENT string
)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS
INPUTFORMAT "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"
OUTPUTFORMAT "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
LOCATION '/hdata/H_LINEITEM';
-Dsqoop.export.records.per.statement=1000000 \
-Dsqoop.export.statements.per.transaction=1000000 \
--connect "jdbc:oracle:thin:@192.168.1.32:1522:orcl" --password "system" --username "sys as sysdba" \
--table "TPCH.HIVE_LINEITEM" --hcatalog-table "H_LINEITEM_EXT" --hcatalog-database "default" --hcatalog-home /home/hadoop/hive \
-m 3 \
--batch \
--verbose
2019-09-03 18:23:05,359 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
2019-09-03 18:23:05,606 INFO [main] org.apache.hadoop.mapred.Task: Using ResourceCalculatorProcessTree : [ ]
2019-09-03 18:23:05,841 INFO [main] org.apache.hadoop.mapred.MapTask: Processing split: org.apache.sqoop.mapreduce.hcat.SqoopHCatInputSplit@6c2d4cc6
2019-09-03 18:23:06,584 INFO [main] org.apache.hadoop.hive.conf.HiveConf: Found configuration file null
2019-09-03 18:23:06,884 INFO [main] org.apache.hadoop.conf.Configuration.deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
2019-09-03 18:23:06,924 INFO [main] org.apache.parquet.hadoop.InternalParquetRecordReader: RecordReader initialized will read a total of 730184 records.
2019-09-03 18:23:06,924 INFO [main] org.apache.parquet.hadoop.InternalParquetRecordReader: at row 0. reading next block
2019-09-03 18:23:07,146 INFO [main] org.apache.hadoop.io.compress.CodecPool: Got brand-new decompressor [.snappy]
2019-09-03 18:23:07,154 INFO [main] org.apache.parquet.hadoop.InternalParquetRecordReader: block read in memory in 230 ms. row count = 730184
2019-09-03 18:23:07,576 INFO [main] org.apache.hive.hcatalog.mapreduce.InternalUtil: Initializing org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe with properties {name=default.h_lineitem_ext, numFiles=1, columns.types=string,string,string,string,string,string,string,string,string,string,date,date,date,string,string,string, serialization.format=1, columns=l_orderkey,l_partkey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment, columns.comments=null^@null^@null^@null^@null^@null^@null^@null^@null^@null^@null^@null^@null^@null^@null^@null, bucketing_version=2, EXTERNAL=TRUE, serialization.lib=org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe, totalSize=283419200, column.name.delimiter=,, serialization.null.format=\N, transient_lastDdlTime=1567428097}
2019-09-03 18:23:07,638 DEBUG [main] org.apache.sqoop.mapreduce.AutoProgressMapper: Instructing auto-progress thread to quit.
2019-09-03 18:23:07,638 DEBUG [main] org.apache.sqoop.mapreduce.AutoProgressMapper: Waiting for progress thread shutdown...
2019-09-03 18:23:07,638 INFO [Thread-15] org.apache.sqoop.mapreduce.AutoProgressMapper: Auto-progress thread is finished. keepGoing=false
2019-09-03 18:23:07,638 DEBUG [main] org.apache.sqoop.mapreduce.AutoProgressMapper: Progress thread shutdown detected.
2019-09-03 18:23:07,639 DEBUG [Thread-14] org.apache.sqoop.mapreduce.AsyncSqlOutputFormat: Committing transaction of 0 statements
2019-09-03 18:23:07,643 WARN [main] org.apache.hadoop.mapred.YarnChild: Exception running child :
java.lang.ClassCastException: org.apache.hadoop.hive.common.type.Date cannot be cast to java.sql.Date
at org.apache.sqoop.mapreduce.hcat.SqoopHCatExportHelper.convertToSqoop(SqoopHCatExportHelper.java:193)
at org.apache.sqoop.mapreduce.hcat.SqoopHCatExportHelper.convertToSqoopRecord(SqoopHCatExportHelper.java:138)
at org.apache.sqoop.mapreduce.hcat.SqoopHCatExportMapper.map(SqoopHCatExportMapper.java:56)
at org.apache.sqoop.mapreduce.hcat.SqoopHCatExportMapper.map(SqoopHCatExportMapper.java:35)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:146)
at org.apache.sqoop.mapreduce.AutoProgressMapper.run(AutoProgressMapper.java:64)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:799)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:347)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:174)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1729)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:168)
hadoop-3.1.1 sqoop-1.4.7
I've tried with :
--map-column-java "l_shipdate=String,l_commitdate=String,l_receiptdate=String"
or
--map-column-java "L_SHIPDATE=java.sql.Date,L_COMMITDATE=java.sql.Date,L_RECEIPTDATE=java.sql.Date"
没有帮助
如果我在蜂巢中创建一个包含来自外部表H_LINEITEM_EXT的所有“字符串”列的表,那么我成功地导入了该表。但这不是一个好的解决方案,因为它会复制时间和空间