Details
Description
hive returns incorrect number of rows when BETWEEN and NOT BETWEEN operators are used in WHERE clause while querying a table that uses ORC as a storage format.
script to replicate the issue on HDP 2.6:
SET hive.exec.compress.output=false; SET hive.vectorized.execution.enabled=false; SET hive.optimize.ppd=true; SET hive.optimize.ppd.storage=true; SET N=100000; SET TTT=default.tmp_tbl_text; SET TTO=default.tmp_tbl_orc; DROP TABLE IF EXISTS ${hiveconf:TTT}; DROP TABLE IF EXISTS ${hiveconf:TTO}; create table ${hiveconf:TTT} stored as textfile as select pos as c from ( select posexplode(split(repeat(',', ${hiveconf:N}), ',')) ) as t; create table ${hiveconf:TTO} stored as orc as select c from ${hiveconf:TTT}; SELECT count(c) as cnt FROM ${hiveconf:TTT} WHERE c between 0 and ${hiveconf:N} and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2 ; SELECT count(c) as cnt FROM ${hiveconf:TTO} WHERE c between 0 and ${hiveconf:N} and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2 ; DROP TABLE IF EXISTS ${hiveconf:TTT}; DROP TABLE IF EXISTS ${hiveconf:TTO};