Uploaded image for project: 'ORC'
  1. ORC
  2. ORC-342

Error reading timestamp in range [1969-12-31 23:59:59.000,1969-12-31 23:59:59.999] when time zone shifts

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Open
    • Minor
    • Resolution: Unresolved
    • None
    • None
    • None
    • None

    Description

      Timestamps in range [1969-12-31 23:59:59.000,1969-12-31 23:59:59.999] will be returned as [1970-01-01 00:00:00.000,1970-01-01 00:00:00.999].

      Below a test based on TestOrcTimezone3 but modified to reproduce the issue.

      /**
       * Licensed to the Apache Software Foundation (ASF) under one
       * or more contributor license agreements.  See the NOTICE file
       * distributed with this work for additional information
       * regarding copyright ownership.  The ASF licenses this file
       * to you under the Apache License, Version 2.0 (the
       * "License"); you may not use this file except in compliance
       * with the License.  You may obtain a copy of the License at
       *
       *     http://www.apache.org/licenses/LICENSE-2.0
       *
       * Unless required by applicable law or agreed to in writing, software
       * distributed under the License is distributed on an "AS IS" BASIS,
       * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
       * See the License for the specific language governing permissions and
       * limitations under the License.
       */
      package org.apache.orc;
      
      import static junit.framework.Assert.assertEquals;
      
      import java.io.File;
      import java.sql.Timestamp;
      import java.util.Arrays;
      import java.util.Collection;
      import java.util.List;
      import java.util.TimeZone;
      
      import org.apache.hadoop.conf.Configuration;
      import org.apache.hadoop.fs.FileSystem;
      import org.apache.hadoop.fs.Path;
      import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
      import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
      import org.junit.After;
      import org.junit.Before;
      import org.junit.Rule;
      import org.junit.Test;
      import org.junit.rules.TestName;
      import org.junit.runner.RunWith;
      import org.junit.runners.Parameterized;
      
      import com.google.common.collect.Lists;
      
      import junit.framework.Assert;
      
      /**
       *
       */
      @RunWith(Parameterized.class)
      public class TestOrcTimezone3 {
        Path workDir = new Path(System.getProperty("test.tmp.dir",
            "target" + File.separator + "test" + File.separator + "tmp"));
        Configuration conf;
        FileSystem fs;
        Path testFilePath;
        String writerTimeZone;
        String readerTimeZone;
        static TimeZone defaultTimeZone = TimeZone.getDefault();
      
        public TestOrcTimezone3(String writerTZ, String readerTZ) {
          this.writerTimeZone = writerTZ;
          this.readerTimeZone = readerTZ;
        }
      
        @Parameterized.Parameters
        public static Collection<Object[]> data() {
          List<Object[]> result = Arrays.asList(new Object[][]{
              {"UTC", "America/Los_Angeles"},
          });
          return result;
        }
      
        @Rule
        public TestName testCaseName = new TestName();
      
        @Before
        public void openFileSystem() throws Exception {
          conf = new Configuration();
          fs = FileSystem.getLocal(conf);
          testFilePath = new Path(workDir, "TestOrcTimezone3." +
              testCaseName.getMethodName() + ".orc");
          fs.delete(testFilePath, false);
        }
      
        @After
        public void restoreTimeZone() {
          TimeZone.setDefault(defaultTimeZone);
        }
      
        @Test
        public void testTimestampWriter() throws Exception {
          TypeDescription schema = TypeDescription.createTimestamp();
      
          TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone));
          Writer writer = OrcFile.createWriter(testFilePath,
              OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
                  .bufferSize(10000));
          assertEquals(writerTimeZone, TimeZone.getDefault().getID());
          List<String> ts = Lists.newArrayList();
          ts.add("1969-12-31 23:59:59.007");
          VectorizedRowBatch batch = schema.createRowBatch();
          TimestampColumnVector times = (TimestampColumnVector) batch.cols[0];
          for (String t : ts) {
            times.set(batch.size++, Timestamp.valueOf(t));
          }
          writer.addRowBatch(batch);
          writer.close();
      
          TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone));
          Reader reader = OrcFile.createReader(testFilePath,
              OrcFile.readerOptions(conf).filesystem(fs));
          assertEquals(readerTimeZone, TimeZone.getDefault().getID());
          RecordReader rows = reader.rows();
          batch = reader.getSchema().createRowBatch();
          times = (TimestampColumnVector) batch.cols[0];
          int idx = 0;
          while (rows.nextBatch(batch)) {
            for(int r=0; r < batch.size; ++r) {
              assertEquals(ts.get(idx++), times.asScratchTimestamp(r).toString());
            }
          }
          rows.close();
        }
      }
      
      Expected :1969-12-31 23:59:59.007
      Actual   :1970-01-01 00:00:00.007
      

      Attachments

        Issue Links

          Activity

            People

              Unassigned Unassigned
              jcamacho Jesús Camacho Rodríguez
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated: