Some updates

deephaven · Jul 20, 2023 · ccfc7f6 · ccfc7f6
1 parent d4bca80
commit ccfc7f6
Show file tree

Hide file tree

Showing 7 changed files with 8 additions and 24 deletions.
diff --git a/...nsions/parquet/base/src/main/java/io/deephaven/parquet/base/AbstractBulkValuesWriter.java b/...nsions/parquet/base/src/main/java/io/deephaven/parquet/base/AbstractBulkValuesWriter.java
@@ -43,14 +43,13 @@ public int writeBulkVector(@NotNull final BUFFER_TYPE bulkValues,
 
     /**
      * This method is used to provide a specialized definition of NULL for PlainIntChunkedWriter. For other writers,
-     * this method does nothing
+     * this method does nothing.
      *
-     * @param nullDefinition The specialized value of NULL to consider while writing
+     * @param nullDefinition The value which will be interpreted as NULL for this writer
      */
     @Override
     public void setNull(int nullDefinition) {
-        // TODO Should I raise an error here since this should not be called, should only be called for the
-        // PlainIntChunkedWriter specializaiton.
+        throw new UnsupportedOperationException("This method should not be called");
     }
 
     /**

diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnWriterImpl.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ColumnWriterImpl.java
@@ -217,6 +217,7 @@ public void addVectorPage(
         }
         initWriter();
         // noinspection unchecked
+        // TODO Should I also change this path? I don't fully understand this path. Talk to Ryan for this.
         int valueCount =
                 bulkWriter.writeBulkVector(pageData, repeatCount, rlEncoder, dlEncoder, nonNullValueCount);
         writePage(bulkWriter.getByteBufferView(), valueCount);

diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PlainIntChunkedWriter.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/PlainIntChunkedWriter.java
@@ -22,8 +22,9 @@
 public class PlainIntChunkedWriter extends AbstractBulkValuesWriter<IntBuffer> {
     private static final int MAXIMUM_TOTAL_CAPACITY = Integer.MAX_VALUE / Integer.BYTES;
 
-    // This value will be considered as a NULL while writing. By default, we use the definition for QueryConstants but
-    // can be overridden.
+    // This value will be considered as a NULL while writing. For example, if NULL_DEF is set as 65535, then 65535 will
+    // be written as NULL by this writer. By default, we use the definition from QueryConstants but can be overridden.
+    // TODO Can there be a better name for this?
     private int NULL_DEF = QueryConstants.NULL_INT;
 
     private final ByteBufferAllocator allocator;

diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java
@@ -579,7 +579,6 @@ private static <DATA_TYPE> void encodePlain(@NotNull final ParquetInstructions w
                 maxValuesPerPage,
                 columnType,
                 writeInstructions)) {
-            final boolean supportNulls = supportNulls(columnType);
             final Object bufferToWrite = transferObject.getBuffer();
             try (final RowSequence.Iterator lengthIndexIt =
                     lengthSource != null ? originalRowSet.getRowSequenceIterator() : null;
@@ -600,10 +599,8 @@ private static <DATA_TYPE> void encodePlain(@NotNull final ParquetInstructions w
                         repeatCount.limit(lenChunk.size());
                         columnWriter.addVectorPage(bufferToWrite, repeatCount, transferObject.rowCount());
                         repeatCount.clear();
-                    } else if (supportNulls) {
-                        columnWriter.addPage(bufferToWrite, transferObject.rowCount(), columnType);
                     } else {
-                        columnWriter.addPageNoNulls(bufferToWrite, transferObject.rowCount());
+                        columnWriter.addPage(bufferToWrite, transferObject.rowCount(), columnType);
                     }
                 }
             }
@@ -711,11 +708,6 @@ private static <DATA_TYPE> boolean tryEncodeDictionary(@NotNull final ParquetIns
         }
     }
 
-    private static boolean supportNulls(@NotNull final Class<?> columnType) {
-        return true;
-        // return !columnType.isPrimitive();
-    }
-
     /**
      * Get the number of rows that fit within the current targetPageSize for the specified type.
      *

diff --git a/py/server/deephaven/dtypes.py b/py/server/deephaven/dtypes.py
@@ -203,8 +203,6 @@ def array(dtype: DType, seq: Sequence, remap: Callable[[Any], Any] = None) -> jp
         DHError
     """
     try:
-        # print("+++++++++++++ Original Seq: " + str(seq))
-
         if isinstance(seq, str) and dtype == char:
             # ord is the Python builtin function that takes a unicode character and returns an integer code point value
             remap = ord
@@ -214,8 +212,6 @@ def array(dtype: DType, seq: Sequence, remap: Callable[[Any], Any] = None) -> jp
                 raise ValueError("Not a callable")
             seq = [remap(v) for v in seq]
 
-        # print("+++++++++++++ Remapped Seq: " + str(seq))
-
         if isinstance(seq, np.ndarray):
             if dtype == bool_:
                 bytes_ = seq.astype(dtype=np.int8)

diff --git a/py/server/deephaven/numpy.py b/py/server/deephaven/numpy.py
@@ -74,7 +74,6 @@ def _columns_to_2d_numpy_array(col_def: Column, j_arrays: List[jpy.JType]) -> np
 
 def _make_input_column(col: str, np_array: np.ndarray, dtype: DType) -> InputColumn:
     """ Creates a InputColumn with the given column name and the numpy array. """
-    # print("_make_input_column: NP Array received=" + str(np_array))
     return InputColumn(name=_to_column_name(col), data_type=dtype, input_data=np_array)
 
 

diff --git a/py/server/deephaven/pandas.py b/py/server/deephaven/pandas.py
@@ -175,19 +175,16 @@ def to_pandas(table: Table, cols: List[str] = None, dtype_backend: str = None) -
 def _map_na(np_array: np.ndarray):
     """Replaces the pd.NA values in the array if it is of pandas ExtensionDtype(nullable)."""
     pd_dtype = np_array.dtype
-    # print("----- _map_na(): Processing array of data type " + str(pd_dtype) + ":" + str(np_array))
     if not isinstance(pd_dtype, pd.api.extensions.ExtensionDtype):
         return np_array
 
     dh_null = _EX_DTYPE_NULL_MAP.get(type(pd_dtype)) or _EX_DTYPE_NULL_MAP.get(pd_dtype)
-    # print("----- _map_na(): dh_null=" + str(dh_null))
     if isinstance(pd_dtype, pd.StringDtype) or isinstance(pd_dtype, pd.BooleanDtype) or pd_dtype == pd.ArrowDtype(
             pa.bool_()):
         np_array = np.array(list(map(lambda v: dh_null if v is pd.NA else v, np_array)))
     elif dh_null is not None:
         np_array = np_array.fillna(dh_null)
 
-    # print("----- _map_na(): Processed array=" + str(np_array))
     return np_array
 
 
@@ -231,7 +228,6 @@ def to_table(df: pd.DataFrame, cols: List[str] = None) -> Table:
             else:
                 dtype = np_array.dtype
             dh_dtype = dtypes.from_np_dtype(dtype)
-            # print("to_table:" + str(dtype) + " -> " + str(dh_dtype))
             np_array = _map_na(np_array)
             input_cols.append(_make_input_column(col, np_array, dh_dtype))