Skip to content

Commit

Permalink
Some updates
Browse files Browse the repository at this point in the history
  • Loading branch information
malhotrashivam committed Jul 20, 2023
1 parent d4bca80 commit ccfc7f6
Show file tree
Hide file tree
Showing 7 changed files with 8 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,13 @@ public int writeBulkVector(@NotNull final BUFFER_TYPE bulkValues,

/**
* This method is used to provide a specialized definition of NULL for PlainIntChunkedWriter. For other writers,
* this method does nothing
* this method does nothing.
*
* @param nullDefinition The specialized value of NULL to consider while writing
* @param nullDefinition The value which will be interpreted as NULL for this writer
*/
@Override
public void setNull(int nullDefinition) {
// TODO Should I raise an error here since this should not be called, should only be called for the
// PlainIntChunkedWriter specializaiton.
throw new UnsupportedOperationException("This method should not be called");
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ public void addVectorPage(
}
initWriter();
// noinspection unchecked
// TODO Should I also change this path? I don't fully understand this path. Talk to Ryan for this.
int valueCount =
bulkWriter.writeBulkVector(pageData, repeatCount, rlEncoder, dlEncoder, nonNullValueCount);
writePage(bulkWriter.getByteBufferView(), valueCount);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
public class PlainIntChunkedWriter extends AbstractBulkValuesWriter<IntBuffer> {
private static final int MAXIMUM_TOTAL_CAPACITY = Integer.MAX_VALUE / Integer.BYTES;

// This value will be considered as a NULL while writing. By default, we use the definition for QueryConstants but
// can be overridden.
// This value will be considered as a NULL while writing. For example, if NULL_DEF is set as 65535, then 65535 will
// be written as NULL by this writer. By default, we use the definition from QueryConstants but can be overridden.
// TODO Can there be a better name for this?
private int NULL_DEF = QueryConstants.NULL_INT;

private final ByteBufferAllocator allocator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,6 @@ private static <DATA_TYPE> void encodePlain(@NotNull final ParquetInstructions w
maxValuesPerPage,
columnType,
writeInstructions)) {
final boolean supportNulls = supportNulls(columnType);
final Object bufferToWrite = transferObject.getBuffer();
try (final RowSequence.Iterator lengthIndexIt =
lengthSource != null ? originalRowSet.getRowSequenceIterator() : null;
Expand All @@ -600,10 +599,8 @@ private static <DATA_TYPE> void encodePlain(@NotNull final ParquetInstructions w
repeatCount.limit(lenChunk.size());
columnWriter.addVectorPage(bufferToWrite, repeatCount, transferObject.rowCount());
repeatCount.clear();
} else if (supportNulls) {
columnWriter.addPage(bufferToWrite, transferObject.rowCount(), columnType);
} else {
columnWriter.addPageNoNulls(bufferToWrite, transferObject.rowCount());
columnWriter.addPage(bufferToWrite, transferObject.rowCount(), columnType);
}
}
}
Expand Down Expand Up @@ -711,11 +708,6 @@ private static <DATA_TYPE> boolean tryEncodeDictionary(@NotNull final ParquetIns
}
}

private static boolean supportNulls(@NotNull final Class<?> columnType) {
return true;
// return !columnType.isPrimitive();
}

/**
* Get the number of rows that fit within the current targetPageSize for the specified type.
*
Expand Down
4 changes: 0 additions & 4 deletions py/server/deephaven/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,6 @@ def array(dtype: DType, seq: Sequence, remap: Callable[[Any], Any] = None) -> jp
DHError
"""
try:
# print("+++++++++++++ Original Seq: " + str(seq))

if isinstance(seq, str) and dtype == char:
# ord is the Python builtin function that takes a unicode character and returns an integer code point value
remap = ord
Expand All @@ -214,8 +212,6 @@ def array(dtype: DType, seq: Sequence, remap: Callable[[Any], Any] = None) -> jp
raise ValueError("Not a callable")
seq = [remap(v) for v in seq]

# print("+++++++++++++ Remapped Seq: " + str(seq))

if isinstance(seq, np.ndarray):
if dtype == bool_:
bytes_ = seq.astype(dtype=np.int8)
Expand Down
1 change: 0 additions & 1 deletion py/server/deephaven/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def _columns_to_2d_numpy_array(col_def: Column, j_arrays: List[jpy.JType]) -> np

def _make_input_column(col: str, np_array: np.ndarray, dtype: DType) -> InputColumn:
""" Creates a InputColumn with the given column name and the numpy array. """
# print("_make_input_column: NP Array received=" + str(np_array))
return InputColumn(name=_to_column_name(col), data_type=dtype, input_data=np_array)


Expand Down
4 changes: 0 additions & 4 deletions py/server/deephaven/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,19 +175,16 @@ def to_pandas(table: Table, cols: List[str] = None, dtype_backend: str = None) -
def _map_na(np_array: np.ndarray):
"""Replaces the pd.NA values in the array if it is of pandas ExtensionDtype(nullable)."""
pd_dtype = np_array.dtype
# print("----- _map_na(): Processing array of data type " + str(pd_dtype) + ":" + str(np_array))
if not isinstance(pd_dtype, pd.api.extensions.ExtensionDtype):
return np_array

dh_null = _EX_DTYPE_NULL_MAP.get(type(pd_dtype)) or _EX_DTYPE_NULL_MAP.get(pd_dtype)
# print("----- _map_na(): dh_null=" + str(dh_null))
if isinstance(pd_dtype, pd.StringDtype) or isinstance(pd_dtype, pd.BooleanDtype) or pd_dtype == pd.ArrowDtype(
pa.bool_()):
np_array = np.array(list(map(lambda v: dh_null if v is pd.NA else v, np_array)))
elif dh_null is not None:
np_array = np_array.fillna(dh_null)

# print("----- _map_na(): Processed array=" + str(np_array))
return np_array


Expand Down Expand Up @@ -231,7 +228,6 @@ def to_table(df: pd.DataFrame, cols: List[str] = None) -> Table:
else:
dtype = np_array.dtype
dh_dtype = dtypes.from_np_dtype(dtype)
# print("to_table:" + str(dtype) + " -> " + str(dh_dtype))
np_array = _map_na(np_array)
input_cols.append(_make_input_column(col, np_array, dh_dtype))

Expand Down

0 comments on commit ccfc7f6

Please sign in to comment.