Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OAK-10803 - Fix memory consumption of uncompress properties #1619

Merged
merged 31 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
81bbfe5
OAK-10973 - Performance tune property compression/decompression
Aug 1, 2024
c9e2678
OAK-10973 - Performance tune property compression/decompression
Aug 1, 2024
a7e0def
OAK-10973 - Performance tune property compression/decompression revert
Aug 1, 2024
5567b28
OAK-10973 - Performance tune property compression/decompression set d…
Aug 1, 2024
fb07369
OAK-10973 - Performance tune property compression/decompression set d…
Aug 5, 2024
5c8cc87
OAK-10973 - Performance tune property compression/decompression separ…
Aug 5, 2024
537278b
OAK-10973
Aug 6, 2024
497715b
OAK-10973
Aug 6, 2024
d911237
OAK-10973
Aug 8, 2024
b41a7bc
OAK-10973
Aug 8, 2024
cfc5c3c
OAK-10973 -- added tests for DocumentPropertyStateFactoryTest
Aug 8, 2024
e77cbb7
OAK-10973 -- added tests for DocumentPropertyStateFactoryTest
Aug 8, 2024
6546b1d
OAK-10973 -- added another test method
Aug 12, 2024
8300cd3
OAK-10973 -- set default GZIP for compression
Aug 12, 2024
bcbc4bd
OAK-10973 -- refactor
Aug 12, 2024
3b9cd92
OAK-10973 -- manually added b04de7ea9dce0859de09f90964fa650a7bf5a978 …
Aug 12, 2024
dc2c58c
OAK-10973 -- replace guava collection with jdk one
Aug 12, 2024
8e959de
OAK-10973 -- refactor tests for default Compression GZIP
Aug 12, 2024
8f00576
OAK-10973 -- refactor guava collection
Aug 12, 2024
32745a9
OAK-10973 -- refactor guava collection and move test broken surrogate…
Aug 12, 2024
c85c79e
OAK-10973 -- added -1 condition into factory method
Aug 13, 2024
1980813
OAK-10973 -- added test for -1
Aug 13, 2024
43960b7
OAK-10973 -- remove guava from DocumentPropertyState
Aug 13, 2024
d3621a2
OAK-10973 -- remove duplicate static methods in CompressedDocumentPro…
Aug 13, 2024
102db74
OAK-10973 -- added required test methods CompressedDocumentPropertySt…
Aug 13, 2024
ff16bea
OAK-10973 -- refactor if statement
Aug 13, 2024
933a2fe
OAK-10973 -- refactor if statement
Aug 13, 2024
17ea660
OAK-10973 -- refactor test methods
Aug 13, 2024
d5711fd
OAK-10973 -- refactor test methods
Aug 13, 2024
f5484e2
OAK-10973 -- refactor test methods
Aug 13, 2024
8d14734
OAK-10973 -- remove unused imports
Aug 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,343 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.plugins.document;

import static java.util.Collections.emptyList;
import static org.apache.jackrabbit.oak.plugins.memory.PropertyStates.createProperty;
stefan-egli marked this conversation as resolved.
Show resolved Hide resolved

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import javax.jcr.PropertyType;

import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.Compression;
import org.apache.jackrabbit.oak.commons.LongUtils;
import org.apache.jackrabbit.oak.commons.json.JsopReader;
import org.apache.jackrabbit.oak.commons.json.JsopTokenizer;
import org.apache.jackrabbit.oak.commons.properties.SystemPropertySupplier;
import org.apache.jackrabbit.oak.json.TypeCodes;
import org.apache.jackrabbit.oak.plugins.memory.AbstractPropertyState;
import org.apache.jackrabbit.oak.plugins.memory.BinaryPropertyState;
import org.apache.jackrabbit.oak.plugins.memory.BooleanPropertyState;
import org.apache.jackrabbit.oak.plugins.memory.DoublePropertyState;
import org.apache.jackrabbit.oak.plugins.memory.LongPropertyState;
import org.apache.jackrabbit.oak.plugins.memory.PropertyStates;
import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState;
import org.apache.jackrabbit.oak.plugins.value.Conversions;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* PropertyState compression implementation with lazy parsing of the JSOP encoded value.
*/
public final class CompressedDocumentPropertyState implements PropertyState {
stefan-egli marked this conversation as resolved.
Show resolved Hide resolved

private static final Logger LOG = LoggerFactory.getLogger(CompressedDocumentPropertyState.class);

private final DocumentNodeStore store;

private final String name;

private PropertyState parsed;
private final byte[] compressedValue;
private final Compression compression;

private static int COMPRESSION_THRESHOLD = SystemPropertySupplier
.create("oak.documentMK.stringCompressionThreshold ", -1).loggingTo(LOG).get();

CompressedDocumentPropertyState(DocumentNodeStore store, String name, String value, Compression compression) {
this.store = store;
this.name = name;
this.compression = compression;
try {
this.compressedValue = compress(value.getBytes(StandardCharsets.UTF_8));
} catch (IOException e) {
LOG.warn("Failed to compress property {} value: ", name, e);
throw new IllegalArgumentException("Failed to compress value", e);
}
}

private byte[] compress(byte[] value) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
OutputStream compressionOutputStream = compression.getOutputStream(out);
compressionOutputStream.write(value);
compressionOutputStream.close();
return out.toByteArray();
}

@NotNull
@Override
public String getName() {
return name;
}

@Override
public boolean isArray() {
return parsed().isArray();
}

@Override
public Type<?> getType() {
return parsed().getType();
}

@NotNull
@Override
public <T> T getValue(Type<T> type) {
return parsed().getValue(type);
}

@NotNull
@Override
public <T> T getValue(Type<T> type, int index) {
return parsed().getValue(type, index);
}

@Override
public long size() {
return parsed().size();
}

@Override
public long size(int index) {
long size;
PropertyState parsed = parsed();
if (parsed.getType() == Type.BINARIES) {
size = parsed.getValue(Type.BINARY, index).length();
} else {
size = parsed.size(index);
}
return size;
}

@Override
public int count() {
return parsed().count();
}

/**
* Returns the raw un-parsed value as passed to the constructor of this
* property state.
*
* @return the raw un-parsed value.
*/
@NotNull
String getValue() {
return decompress(this.compressedValue);
}

private String decompress(byte[] value) {
try {
return new String(compression.getInputStream(new ByteArrayInputStream(value)).readAllBytes(), StandardCharsets.UTF_8);
} catch (IOException e) {
LOG.error("Failed to decompress property {} value: ", getName(), e);
return "\"{}\"";
}
}

byte[] getCompressedValue() {
return compressedValue;
}

//------------------------------------------------------------< Object >--

@Override
public boolean equals(Object object) {
if (this == object) {
return true;
} else if (object instanceof CompressedDocumentPropertyState) {
CompressedDocumentPropertyState other = (CompressedDocumentPropertyState) object;
if (!this.name.equals(other.name) || !Arrays.equals(this.compressedValue, other.compressedValue)) {
return false;
}
if (this.compressedValue == null && other.compressedValue == null) {
return getValue().equals(other.getValue());
} else {
// Compare length and content of compressed values
if (this.compressedValue.length != other.compressedValue.length) {
return false;
}
return Arrays.equals(this.compressedValue, other.compressedValue);
}
}
// fall back to default equality check in AbstractPropertyState
return object instanceof PropertyState
&& AbstractPropertyState.equal(parsed(), (PropertyState) object);
}

@Override
public int hashCode() {
return AbstractPropertyState.hashCode(this);
}

@Override
public String toString() {
return AbstractPropertyState.toString(this);
}

static int getCompressionThreshold() {
return COMPRESSION_THRESHOLD;
}

static void setCompressionThreshold(int compressionThreshold) {
COMPRESSION_THRESHOLD = compressionThreshold;
}

//----------------------------< internal >----------------------------------

private PropertyState parsed() {
if (parsed == null) {
JsopReader reader = new JsopTokenizer(getValue());
if (reader.matches('[')) {
parsed = readArrayProperty(name, reader);
} else {
parsed = readProperty(name, reader);
}
}
return parsed;
}

/**
* Read a {@code PropertyState} from a {@link JsopReader}
* @param name The name of the property state
* @param reader The reader
* @return new property state
*/
PropertyState readProperty(String name, JsopReader reader) {
return readProperty(name, store, reader);
}

/**
* Read a {@code PropertyState} from a {@link JsopReader}.
*
* @param name the name of the property state
* @param store the store
* @param reader the reader
* @return new property state
*/
static PropertyState readProperty(String name, DocumentNodeStore store, JsopReader reader) {
if (reader.matches(JsopReader.NUMBER)) {
String number = reader.getToken();
Long maybeLong = LongUtils.tryParse(number);
if (maybeLong == null) {
return new DoublePropertyState(name, Double.parseDouble(number));
} else {
return new LongPropertyState(name, maybeLong);
}
} else if (reader.matches(JsopReader.TRUE)) {
return BooleanPropertyState.booleanProperty(name, true);
} else if (reader.matches(JsopReader.FALSE)) {
return BooleanPropertyState.booleanProperty(name, false);
} else if (reader.matches(JsopReader.STRING)) {
String jsonString = reader.getToken();
if (jsonString.startsWith(TypeCodes.EMPTY_ARRAY)) {
int type = PropertyType.valueFromName(jsonString.substring(TypeCodes.EMPTY_ARRAY.length()));
return PropertyStates.createProperty(name, emptyList(), Type.fromTag(type, true));
}
int split = TypeCodes.split(jsonString);
if (split != -1) {
int type = TypeCodes.decodeType(split, jsonString);
String value = TypeCodes.decodeName(split, jsonString);
if (type == PropertyType.BINARY) {

return BinaryPropertyState.binaryProperty(name, store.getBlobFromBlobId(value));
} else {
return createProperty(name, StringCache.get(value), type);
}
} else {
return StringPropertyState.stringProperty(name, StringCache.get(jsonString));
}
} else {
throw new IllegalArgumentException("Unexpected token: " + reader.getToken());
}
}

/**
* Read a multi valued {@code PropertyState} from a {@link JsopReader}.
*
* @param name the name of the property state
* @param reader the reader
* @return new property state
*/
PropertyState readArrayProperty(String name, JsopReader reader) {
return readArrayProperty(name, store, reader);
}

/**
* Read a multi valued {@code PropertyState} from a {@link JsopReader}.
*
* @param name the name of the property state
* @param store the store
* @param reader the reader
* @return new property state
*/
static PropertyState readArrayProperty(String name, DocumentNodeStore store, JsopReader reader) {
int type = PropertyType.STRING;
List<Object> values = new ArrayList<>();
while (!reader.matches(']')) {
if (reader.matches(JsopReader.NUMBER)) {
String number = reader.getToken();
Long maybeLong = LongUtils.tryParse(number);
if (maybeLong == null) {
type = PropertyType.DOUBLE;
values.add(Double.parseDouble(number));
} else {
type = PropertyType.LONG;
values.add(maybeLong);
}
} else if (reader.matches(JsopReader.TRUE)) {
type = PropertyType.BOOLEAN;
values.add(true);
} else if (reader.matches(JsopReader.FALSE)) {
type = PropertyType.BOOLEAN;
values.add(false);
} else if (reader.matches(JsopReader.STRING)) {
String jsonString = reader.getToken();
int split = TypeCodes.split(jsonString);
if (split != -1) {
type = TypeCodes.decodeType(split, jsonString);
String value = TypeCodes.decodeName(split, jsonString);
if (type == PropertyType.BINARY) {
values.add(store.getBlobFromBlobId(value));
} else if (type == PropertyType.DOUBLE) {
values.add(Conversions.convert(value).toDouble());
} else if (type == PropertyType.DECIMAL) {
values.add(Conversions.convert(value).toDecimal());
} else {
values.add(StringCache.get(value));
}
} else {
type = PropertyType.STRING;
values.add(StringCache.get(jsonString));
}
} else {
throw new IllegalArgumentException("Unexpected token: " + reader.getToken());
}
reader.matches(',');
}
return createProperty(name, values, Type.fromTag(type, true));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1353,7 +1353,7 @@ AbstractDocumentNodeState getSecondaryNodeState(@NotNull final Path path,

@NotNull
public PropertyState createPropertyState(String name, String value){
return new DocumentPropertyState(this, name, checkNotNull(value));
return DocumentPropertyStateFactory.createPropertyState(this, name, checkNotNull(value));
}

/**
Expand Down Expand Up @@ -3214,8 +3214,7 @@ private long getBinarySize(@Nullable String json) {
if (json == null) {
return -1;
}
PropertyState p = new DocumentPropertyState(
DocumentNodeStore.this, "p", json);
PropertyState p = DocumentPropertyStateFactory.createPropertyState(DocumentNodeStore.this, "p", json);
if (p.getType().tag() != PropertyType.BINARY) {
return -1;
}
Expand Down
Loading