作者:Mdh-103_230 | 来源:互联网 | 2023-02-01 18:04
如果表是ORC show create table
,则在执行a 然后执行结果create table
语句时发出问题.
使用show create table
,你得到这个:
STORED AS INPUTFORMAT
‘org.apache.hadoop.hive.ql.io.orc.OrcInputFormat’
OUTPUTFORMAT
‘org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat’
但是如果使用这些子句创建表,则在选择时会出现转换错误.错误喜欢:
异常java.io.IOException失败:java.lang.ClassCastException:org.apache.hadoop.hive.ql.io.orc.OrcStruct无法强制转换为org.apache.hadoop.io.BinaryComparable
要解决这个问题,只需将create table
语句更改为STORED AS ORC
But,正如答案在类似问题中所述:
在Hive中'InputFormat,OutputFormat'和'Stored as'之间有什么区别?.
我无法弄清楚原因.
1> David דודו M..:
STORED AS
意味着三件事:
SERDE
INPUTFORMAT
输出格式
您只定义了最后2个,而SERED由hive.default.serde定义
hive.default.serde
默认值:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
添加于:Hive 0.14 with HIVE-5976
默认的SerDe Hive将用于未指定SerDe的存储格式.
当前未指定SerDe的存储格式包括"TextFile,RcFile".
演示
hive.default.serde
set hive.default.serde;
hive.default.serde=org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
存储为ORC
create table mytable (i int)
stored as orc;
show create table mytable;
注意SERDE是 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
CREATE TABLE `mytable`(
`i` int)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'file:/home/cloudera/local_db/mytable'
TBLPROPERTIES (
'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}',
'numFiles'='0',
'numRows'='0',
'rawDataSize'='0',
'totalSize'='0',
'transient_lastDdlTime'='1496982059')
存储为INPUTFORMAT ...输出...
create table mytable2 (i int)
STORED AS
INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
;
show create table mytable2
;
注意SERDE是 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
CREATE TABLE `mytable2`(
`i` int)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'file:/home/cloudera/local_db/mytable2'
TBLPROPERTIES (
'COLUMN_STATS_ACCURATE'='{\"BASIC_STATS\":\"true\"}',
'numFiles'='0',
'numRows'='0',
'rawDataSize'='0',
'totalSize'='0',
'transient_lastDdlTime'='1496982426')