Skip to content

Commit

Permalink
Revert "using LazyField"
Browse files Browse the repository at this point in the history
This reverts commit 9444900.
  • Loading branch information
Tan-JiaLiang committed Jan 6, 2025
1 parent 7e423ca commit 25b18c8
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.reader.RecordReader;
import org.apache.paimon.utils.LazyField;
import org.apache.paimon.utils.RoaringBitmap32;

import javax.annotation.Nullable;
Expand All @@ -32,17 +31,14 @@ public class FormatReaderContext implements FormatReaderFactory.Context {
private final FileIO fileIO;
private final Path file;
private final long fileSize;
@Nullable private final LazyField<RoaringBitmap32> selection;
@Nullable private final RoaringBitmap32 selection;

public FormatReaderContext(FileIO fileIO, Path file, long fileSize) {
this(fileIO, file, fileSize, null);
}

public FormatReaderContext(
FileIO fileIO,
Path file,
long fileSize,
@Nullable LazyField<RoaringBitmap32> selection) {
FileIO fileIO, Path file, long fileSize, @Nullable RoaringBitmap32 selection) {
this.fileIO = fileIO;
this.file = file;
this.fileSize = fileSize;
Expand All @@ -66,7 +62,7 @@ public long fileSize() {

@Nullable
@Override
public LazyField<RoaringBitmap32> selection() {
public RoaringBitmap32 selection() {
return selection;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.apache.paimon.fs.Path;
import org.apache.paimon.reader.FileRecordReader;
import org.apache.paimon.reader.RecordReader;
import org.apache.paimon.utils.LazyField;
import org.apache.paimon.utils.RoaringBitmap32;

import javax.annotation.Nullable;
Expand All @@ -45,6 +44,6 @@ interface Context {
long fileSize();

@Nullable
LazyField<RoaringBitmap32> selection();
RoaringBitmap32 selection();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
import org.apache.paimon.utils.FormatReaderMapping;
import org.apache.paimon.utils.FormatReaderMapping.Builder;
import org.apache.paimon.utils.IOExceptionSupplier;
import org.apache.paimon.utils.LazyField;
import org.apache.paimon.utils.RoaringBitmap32;

import org.slf4j.Logger;
Expand Down Expand Up @@ -211,23 +210,20 @@ private FileRecordReader<InternalRow> createFileReader(
}
}

BitmapIndexResult fileIndex =
fileIndexResult instanceof BitmapIndexResult
? ((BitmapIndexResult) fileIndexResult)
: null;
RoaringBitmap32 selection = null;
if (fileIndexResult instanceof BitmapIndexResult) {
selection = ((BitmapIndexResult) fileIndexResult).get();
}

RoaringBitmap32 deletion = null;
DeletionVector deletionVector = dvFactory == null ? null : dvFactory.get();
LazyField<RoaringBitmap32> deletion =
deletionVector instanceof BitmapDeletionVector
? new LazyField<>(() -> ((BitmapDeletionVector) deletionVector).get())
: null;
if (deletionVector instanceof BitmapDeletionVector) {
deletion = ((BitmapDeletionVector) deletionVector).get();
}

BitmapIndexResult selection = fileIndex;
if (fileIndex != null && deletion != null) {
selection =
new BitmapIndexResult(
() -> RoaringBitmap32.andNot(fileIndex.get(), deletion.get()));
if (!selection.remain()) {
if (deletion != null && selection != null) {
selection = RoaringBitmap32.andNot(selection, deletion);
if (selection.isEmpty()) {
return new EmptyFileRecordReader<>();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.apache.paimon.fs.PositionOutputStream;
import org.apache.paimon.fs.SeekableInputStream;
import org.apache.paimon.reader.FileRecordReader;
import org.apache.paimon.utils.LazyField;
import org.apache.paimon.utils.RoaringBitmap32;

import java.io.EOFException;
Expand Down Expand Up @@ -84,7 +83,7 @@ public long fileSize() {
}

@Override
public LazyField<RoaringBitmap32> selection() {
public RoaringBitmap32 selection() {
return context.selection();
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

package org.apache.orc.impl;

import org.apache.paimon.utils.LazyField;
import org.apache.paimon.utils.RoaringBitmap32;

import org.apache.commons.lang3.ArrayUtils;
Expand Down Expand Up @@ -128,7 +127,7 @@ public class RecordReaderImpl implements RecordReader {
private final boolean noSelectedVector;
// identifies whether the file has bad bloom filters that we should not use.
private final boolean skipBloomFilters;
@Nullable private final LazyField<RoaringBitmap32> selection;
@Nullable private final RoaringBitmap32 selection;

static final String[] BAD_CPP_BLOOM_FILTER_VERSIONS = {
"1.6.0", "1.6.1", "1.6.2", "1.6.3", "1.6.4", "1.6.5", "1.6.6", "1.6.7", "1.6.8", "1.6.9",
Expand Down Expand Up @@ -226,9 +225,7 @@ public static int[] mapSargColumnsToOrcInternalColIdx(
}

public RecordReaderImpl(
ReaderImpl fileReader,
Reader.Options options,
@Nullable LazyField<RoaringBitmap32> selection)
ReaderImpl fileReader, Reader.Options options, @Nullable RoaringBitmap32 selection)
throws IOException {
this.selection = selection;
OrcFile.WriterVersion writerVersion = fileReader.getWriterVersion();
Expand Down Expand Up @@ -1280,7 +1277,7 @@ public boolean[] pickRowGroups(
OrcProto.BloomFilterIndex[] bloomFilterIndices,
boolean returnNone,
long rowBaseInStripe,
@Nullable LazyField<RoaringBitmap32> selection)
@Nullable RoaringBitmap32 selection)
throws IOException {
long rowsInStripe = stripe.getNumberOfRows();
int groupsInStripe = (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride);
Expand Down Expand Up @@ -1377,7 +1374,7 @@ public boolean[] pickRowGroups(
if (selection != null) {
long firstRow = rowBaseInStripe + rowIndexStride * rowGroup;
long lastRow = Math.min(firstRow + rowIndexStride, firstRow + rowsInStripe);
result[rowGroup] &= selection.get().intersects(firstRow, lastRow);
result[rowGroup] &= selection.intersects(firstRow, lastRow);
}
hasSelected = hasSelected || result[rowGroup];
hasSkipped = hasSkipped || (!result[rowGroup]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.IOUtils;
import org.apache.paimon.utils.LazyField;
import org.apache.paimon.utils.Pair;
import org.apache.paimon.utils.Pool;
import org.apache.paimon.utils.RoaringBitmap32;
Expand Down Expand Up @@ -260,7 +259,7 @@ private static RecordReader createRecordReader(
org.apache.paimon.fs.Path path,
long splitStart,
long splitLength,
@Nullable LazyField<RoaringBitmap32> selection,
@Nullable RoaringBitmap32 selection,
boolean deletionVectorsEnabled)
throws IOException {
org.apache.orc.Reader orcReader = createReader(conf, fileIO, path, selection);
Expand Down Expand Up @@ -344,7 +343,7 @@ public static org.apache.orc.Reader createReader(
org.apache.hadoop.conf.Configuration conf,
FileIO fileIO,
org.apache.paimon.fs.Path path,
@Nullable LazyField<RoaringBitmap32> selection)
@Nullable RoaringBitmap32 selection)
throws IOException {
// open ORC file and create reader
org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(path.toUri());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.paimon.format.parquet.ParquetInputStream;
import org.apache.paimon.fs.FileRange;
import org.apache.paimon.fs.VectoredReadable;
import org.apache.paimon.utils.LazyField;
import org.apache.paimon.utils.RoaringBitmap32;

import org.apache.hadoop.fs.Path;
Expand Down Expand Up @@ -221,7 +220,7 @@ private static ParquetMetadata readFooter(
private final List<ColumnIndexStore> blockIndexStores;
private final List<RowRanges> blockRowRanges;
private final boolean blocksFiltered;
@Nullable private final LazyField<RoaringBitmap32> selection;
@Nullable private final RoaringBitmap32 selection;

// not final. in some cases, this may be lazily loaded for backward-compat.
private ParquetMetadata footer;
Expand All @@ -233,9 +232,7 @@ private static ParquetMetadata readFooter(
private InternalFileDecryptor fileDecryptor;

public ParquetFileReader(
InputFile file,
ParquetReadOptions options,
@Nullable LazyField<RoaringBitmap32> selection)
InputFile file, ParquetReadOptions options, @Nullable RoaringBitmap32 selection)
throws IOException {
this.converter = new ParquetMetadataConverter(options);
this.file = (ParquetInputFile) file;
Expand Down Expand Up @@ -368,12 +365,9 @@ private List<BlockMetaData> filterRowGroups(List<BlockMetaData> blocks) throws I
blocks.stream()
.filter(
it ->
selection
.get()
.intersects(
it.getRowIndexOffset(),
it.getRowIndexOffset()
+ it.getRowCount()))
selection.intersects(
it.getRowIndexOffset(),
it.getRowIndexOffset() + it.getRowCount()))
.collect(Collectors.toList());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

package org.apache.parquet.internal.filter2.columnindex;

import org.apache.paimon.utils.LazyField;
import org.apache.paimon.utils.RoaringBitmap32;

import org.apache.parquet.filter2.compat.FilterCompat;
Expand Down Expand Up @@ -59,7 +58,7 @@
* therefore a {@link MissingOffsetIndexException} will be thrown from any {@code visit} methods if
* any of the required offset indexes is missing.
*
* <p>Note: The class was copied over to support using selected position to filter {@link
* <p>Note: The class was copied over to support using {@link RoaringBitmap32} to filter {@link
* RowRanges}.
*/
public class ColumnIndexFilter implements Visitor<RowRanges> {
Expand All @@ -69,7 +68,7 @@ public class ColumnIndexFilter implements Visitor<RowRanges> {
private final Set<ColumnPath> columns;
private final long rowCount;
private final long rowIndexOffset;
@Nullable private final LazyField<RoaringBitmap32> selection;
@Nullable private final RoaringBitmap32 selection;
private RowRanges allRows;

/**
Expand All @@ -93,7 +92,7 @@ public static RowRanges calculateRowRanges(
Set<ColumnPath> paths,
long rowCount,
long rowIndexOffset,
@Nullable LazyField<RoaringBitmap32> selection) {
@Nullable RoaringBitmap32 selection) {
return filter.accept(
new FilterCompat.Visitor<RowRanges>() {
@Override
Expand Down Expand Up @@ -131,7 +130,7 @@ private ColumnIndexFilter(
Set<ColumnPath> paths,
long rowCount,
long rowIndexOffset,
@Nullable LazyField<RoaringBitmap32> selection) {
@Nullable RoaringBitmap32 selection) {
this.columnIndexStore = columnIndexStore;
this.columns = paths;
this.rowCount = rowCount;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

package org.apache.parquet.internal.filter2.columnindex;

import org.apache.paimon.utils.LazyField;
import org.apache.paimon.utils.RoaringBitmap32;

import org.apache.parquet.filter2.compat.FilterCompat.Filter;
Expand All @@ -41,9 +40,10 @@
*
* <p>Note: The class was copied over to support using selected position to filter or narrow the
* {@link RowRanges}. Added a new method {@link RowRanges#create(long, long,
* PrimitiveIterator.OfInt, OffsetIndex, LazyField)}
* PrimitiveIterator.OfInt, OffsetIndex, RoaringBitmap32)}
*
* @see ColumnIndexFilter#calculateRowRanges(Filter, ColumnIndexStore, Set, long, long, LazyField)
* @see ColumnIndexFilter#calculateRowRanges(Filter, ColumnIndexStore, Set, long, long,
* RoaringBitmap32)
*/
public class RowRanges {

Expand Down Expand Up @@ -167,7 +167,7 @@ public static RowRanges create(
long rowIndexOffset,
PrimitiveIterator.OfInt pageIndexes,
OffsetIndex offsetIndex,
@Nullable LazyField<RoaringBitmap32> selection) {
@Nullable RoaringBitmap32 selection) {
RowRanges ranges = new RowRanges();
while (pageIndexes.hasNext()) {
int pageIndex = pageIndexes.nextInt();
Expand All @@ -178,12 +178,11 @@ public static RowRanges create(
if (selection != null) {
long first = rowIndexOffset + firstRowIndex;
long last = rowIndexOffset + lastRowIndex;
RoaringBitmap32 result = selection.get();
if (!result.intersects(first, last + 1)) {
if (!selection.intersects(first, last + 1)) {
continue;
}
firstRowIndex = result.nextValue((int) first) - rowIndexOffset;
lastRowIndex = result.previousValue((int) (last)) - rowIndexOffset;
firstRowIndex = selection.nextValue((int) first) - rowIndexOffset;
lastRowIndex = selection.previousValue((int) (last)) - rowIndexOffset;
}

ranges.add(new Range(firstRowIndex, lastRowIndex));
Expand Down

0 comments on commit 25b18c8

Please sign in to comment.