Details
-
Bug
-
Status: Closed
-
Major
-
Resolution: Invalid
-
None
-
None
-
None
Description
This appears to be a variant of ARROW-9790, but specifically for list columns. Affects the latest released version of the rust crates arrow and parquet (17.0.0).
use arrow::array::{Int32Builder, ListBuilder}; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; use parquet::arrow::{ArrowReader, ArrowWriter, ParquetFileArrowReader}; use parquet::file::properties::WriterProperties; use parquet::file::reader::SerializedFileReader; use std::error::Error; use std::sync::Arc; use tempfile::NamedTempFile; fn main() -> Result<(), Box<dyn Error>> { let schema = Arc::new(Schema::new(vec![ Field::new("int", DataType::Int32, false), Field::new( "list", DataType::List(Box::new(Field::new("item", DataType::Int32, true))), false, ), ])); let temp_file = NamedTempFile::new()?; let mut writer = ArrowWriter::try_new( temp_file.reopen()?, schema.clone(), Some( WriterProperties::builder() .set_max_row_group_size(8) .build(), ), )?; for _ in 0..2 { let mut int_builder = Int32Builder::new(10); let mut list_builder = ListBuilder::new(Int32Builder::new(10)); for i in 0..10 { int_builder.append_value(i)?; list_builder.append(true)?; } let batch = RecordBatch::try_new( schema.clone(), vec![ Arc::new(int_builder.finish()), Arc::new(list_builder.finish()), ], )?; writer.write(&batch)?; } writer.close()?; let file_reader = Arc::new(SerializedFileReader::new(temp_file.reopen()?)?); let mut file_reader = ParquetFileArrowReader::new(file_reader); let mut record_reader = file_reader.get_record_reader(8)?; assert_eq!(8, record_reader.next().unwrap()?.num_rows()); assert_eq!(8, record_reader.next().unwrap()?.num_rows()); assert_eq!(4, record_reader.next().unwrap()?.num_rows()); Ok(()) }
Fails with `Error: ParquetError("Parquet error: Not all children array length are the same!")`