Skip to content

Commit

Permalink
[PLAT-16283]Add support bundle size estimate api
Browse files Browse the repository at this point in the history
Summary:
This diff has the following changes:

- Added a handler to remove excess logic from controller
- We were converting  back and forth between Paths and Strings in a bunch of places which was inefficient, so refactored the code to only use Paths when required.
- Added new interface method which will return file sizes and paths for applicable components.
 This is used by both the support bundle creation and size estimate APIs.
- Adjusted UTs where required for new behaviour.
- Added new size estimation API which will return a map of all component sizes and total size
```
Request:
GET    /customers/:cUUID/universes/:uniUUID/support_bundle/estimate_size
{
    "startDate": "2024-12-26T10:50:21Z",
    "endDate": "2024-12-27T10:50:21Z",
    "components": [
        "ApplicationLogs",
        "YbaMetadata",
        "UniverseLogs",
        "OutputFiles",
        "ErrorFiles",
        "GFlags",
        "Instance",
        "ConsensusMeta",
        "TabletMeta",
        "NodeAgent",
        "CoreFiles",
        "YbcLogs"
    ],
    "maxNumRecentCores": 1,
    "maxCoreFileSize": 26843545600
}

Response:
{
    "data": {
        "yb-admin-asharma-aws-test-3n-n7": {
            "GFlags": 10000,
            "Instance": 10000,
            "YbcLogs": 13840,
            "OutputFiles": 10000,
            "TabletMeta": 135390,
            "NodeAgent": 47640,
            "UniverseLogs": 2827537,
            "ErrorFiles": 10000,
            "ConsensusMeta": 8888,
            "CoreFiles": 0
        },
        "YBA": {
            "YbaMetadata": 180580,
            "PrometheusMetrics": 51910000,
            "ApplicationLogs": 4541851
        },...
    }
}

Test Plan:
Existing UTs and iTests.

Verified that size returned by the new API is equal to the size of the bundle after being unzipped.
For prometheus component, verified that the estimates are ok in 15 mins - 3 hrs range.
Beyond this time range, the actual data collected goes into GBs and the estimates are off but it should be ok since prom data is usually only collected for a short period of time around some task failure.

Tested the new API on 1n, 3n and 10n universes. The API response time in all cases is < 2 secs and should be similar for even larger universes since all the data collection happens in parallel.

Reviewers: #yba-api-review!, skurapati

Reviewed By: skurapati

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D40910
  • Loading branch information
asharma-yb committed Jan 8, 2025
1 parent a8d9bbd commit d6cea59
Show file tree
Hide file tree
Showing 32 changed files with 1,080 additions and 321 deletions.
9 changes: 5 additions & 4 deletions managed/devops/bin/node_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ check_file_exists() {
fi
}

find_paths_in_dir() {
get_paths_and_sizes() {
remote_dir_path=$1
shift
max_depth=$1
Expand All @@ -70,12 +70,13 @@ find_paths_in_dir() {
shift
temp_file_path=$1

find "$remote_dir_path" -maxdepth "$max_depth" -type "$file_type" > "$temp_file_path"
find "$remote_dir_path" -maxdepth "$max_depth" -type "$file_type" \
-exec ls -ltp {} + | awk '{print $5, $9}' > "$temp_file_path"
}

# This function returns a list of file paths and their respective sizes, in a given directory.
# This function returns a list of file names and their respective sizes, in a given directory.
# Sorts the list by modification time, with newest first.
get_paths_and_sizes() {
get_paths_and_sizes_within_dates() {
remote_dir_path=$1
shift
temp_file_path=$1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.yugabyte.yw.common.operator.OperatorStatusUpdaterFactory;
import com.yugabyte.yw.common.supportbundle.SupportBundleComponent;
import com.yugabyte.yw.common.supportbundle.SupportBundleComponentFactory;
import com.yugabyte.yw.common.utils.Pair;
import com.yugabyte.yw.controllers.handlers.UniverseInfoHandler;
import com.yugabyte.yw.models.Customer;
import com.yugabyte.yw.models.SupportBundle;
Expand All @@ -43,14 +44,12 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.time.DateUtils;
import play.libs.Json;

@Slf4j
Expand All @@ -60,7 +59,7 @@ public class CreateSupportBundle extends AbstractTaskBase {
@Inject private UniverseInfoHandler universeInfoHandler;
@Inject private SupportBundleComponentFactory supportBundleComponentFactory;
@Inject private SupportBundleUtil supportBundleUtil;
@Inject private Config config;
@Inject private Config staticConfig;
@Inject private NodeUniverseManager nodeUniverseManager;
@Inject RuntimeConfGetter confGetter;

Expand Down Expand Up @@ -113,30 +112,10 @@ public Path generateBundle(SupportBundle supportBundle) throws Exception {
Files.createDirectories(bundlePath);
log.debug("Fetching Universe {} logs", universe.getName());

// Simplified the following 4 cases to extract appropriate start and end date
// 1. If both of the dates are given and valid
// 2. If only the start date is valid, filter from startDate till the end
// 3. If only the end date is valid, filter from the beginning till endDate
// 4. Default : If no dates are specified, download all the files from last n days
Date startDate, endDate;
boolean startDateIsValid = supportBundleUtil.isValidDate(supportBundle.getStartDate());
boolean endDateIsValid = supportBundleUtil.isValidDate(supportBundle.getEndDate());
if (!startDateIsValid && !endDateIsValid) {
int default_date_range = config.getInt("yb.support_bundle.default_date_range");
endDate = supportBundleUtil.getTodaysDate();
startDate =
DateUtils.truncate(
supportBundleUtil.getDateNDaysAgo(endDate, default_date_range),
Calendar.DAY_OF_MONTH);
} else {
// Strip the date object of the time and set only the date.
// This will ensure that we collect files inclusive of the start date.
startDate =
startDateIsValid
? DateUtils.truncate(supportBundle.getStartDate(), Calendar.DAY_OF_MONTH)
: new Date(Long.MIN_VALUE);
endDate = endDateIsValid ? supportBundle.getEndDate() : new Date(Long.MAX_VALUE);
}
Pair<Date, Date> datePair =
supportBundleUtil.getValidStartAndEndDates(
staticConfig, supportBundle.getStartDate(), supportBundle.getEndDate());
Date startDate = datePair.getFirst(), endDate = datePair.getSecond();

// Add the supportBundle metadata into the bundle
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ public void run() {
} catch (Exception e) {
// Log the error and continue with the rest of support bundle collection.
log.error(
"Error occurred in support bundle collection for component '{}' on {} node: {}",
"Error occurred in support bundle collection for component '{}' on {} node",
taskParams().supportBundleComponent.getClass().getSimpleName(),
(taskParams().node == null) ? "YBA" : taskParams().node.getNodeName(),
e.getMessage());
(taskParams().node == null) ? "YBA" : taskParams().node.getNodeName());
e.printStackTrace();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import com.yugabyte.yw.common.config.RuntimeConfGetter;
import com.yugabyte.yw.common.config.UniverseConfKeys;
import com.yugabyte.yw.common.gflags.GFlagsUtil;
import com.yugabyte.yw.common.utils.Pair;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.Cluster;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.UserIntent;
Expand All @@ -27,20 +26,19 @@
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.io.FileUtils;
Expand Down Expand Up @@ -730,16 +728,16 @@ public boolean isNodeReachable(NodeDetails node, Universe universe, long timeout
}

/**
* Gets a list of all the absolute file paths at a given remote directory
* Gets a map of all the absolute file paths to sizes at a given remote directory
*
* @param node
* @param universe
* @param remoteDirPath
* @param maxDepth
* @param fileType
* @return list of strings of all the absolute file paths
* @return map of absolute file paths to file sizes
*/
public List<Path> getNodeFilePaths(
public Map<String, Long> getNodeFilePathAndSizes(
NodeDetails node, Universe universe, String remoteDirPath, int maxDepth, String fileType) {
String localTempFilePath =
getLocalTmpDir() + "/" + UUID.randomUUID().toString() + "-source-files-unfiltered.txt";
Expand All @@ -750,7 +748,7 @@ public List<Path> getNodeFilePaths(
+ "-source-files-unfiltered.txt";

List<String> findCommandParams = new ArrayList<>();
findCommandParams.add("find_paths_in_dir");
findCommandParams.add("get_paths_and_sizes");
findCommandParams.add(remoteDirPath);
findCommandParams.add(String.valueOf(maxDepth));
findCommandParams.add(fileType);
Expand All @@ -768,28 +766,37 @@ public List<Path> getNodeFilePaths(

// Populate the text file into array.
List<String> nodeFilePathStrings = Arrays.asList();
// LinkedHashMap to maintain insertion order.
// Files are ordered most recent to least.
Map<String, Long> nodeFilePathSizeMap = new LinkedHashMap<>();
try {
nodeFilePathStrings = Files.readAllLines(Paths.get(localTempFilePath));
for (String outputLine : nodeFilePathStrings) {
String[] outputLineSplit = outputLine.split("\\s+", 2);
if (!StringUtils.isBlank(outputLine) && outputLineSplit.length == 2) {
nodeFilePathSizeMap.put(outputLineSplit[1], Long.valueOf(outputLineSplit[0]));
}
}
} catch (IOException e) {
log.error("Error occurred", e);
} finally {
FileUtils.deleteQuietly(new File(localTempFilePath));
}
return nodeFilePathStrings.stream().map(Paths::get).collect(Collectors.toList());
return nodeFilePathSizeMap;
}

/**
* Returns a list of file sizes (in bytes) and their names present in a remote directory on the
* Returns a map of file names to their sizes (in bytes) present in a remote directory on the
* node. This function creates a temp file with these sizes and names and copies the temp file
* from remote to local. Then reads and processes this info from the local temp file. This is done
* so that this operation is scalable for large number of files present on the node.
*
* @param node
* @param universe
* @param remoteDirPath
* @return the list of pairs (size, name)
* @return a map of filenames to filesizes
*/
public List<Pair<Long, String>> getNodeFilePathsAndSize(
public Map<String, Long> getNodeFilePathsAndSizeWithinDates(
NodeDetails node, Universe universe, String remoteDirPath, Date startDate, Date endDate) {
String randomUUIDStr = UUID.randomUUID().toString();
String localTempFilePath =
Expand All @@ -799,7 +806,7 @@ public List<Pair<Long, String>> getNodeFilePathsAndSize(

SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
List<String> findCommandParams = new ArrayList<>();
findCommandParams.add("get_paths_and_sizes");
findCommandParams.add("get_paths_and_sizes_within_dates");
findCommandParams.add(remoteDirPath);
findCommandParams.add(remoteTempFilePath);
findCommandParams.add(formatter.format(startDate));
Expand All @@ -817,23 +824,24 @@ public List<Pair<Long, String>> getNodeFilePathsAndSize(

// Populate the text file into array.
List<String> nodeFilePathStrings = Arrays.asList();
List<Pair<Long, String>> nodeFileSizePathStrings = new ArrayList<>();
// LinkedHashMap to maintain insertion order.
// Files are ordered most recent to least.
Map<String, Long> nodeFilePathSizeMap = new LinkedHashMap<>();
try {
nodeFilePathStrings = Files.readAllLines(Paths.get(localTempFilePath));
log.debug("List of files found on the node '{}': '{}'", node.nodeName, nodeFilePathStrings);
for (String outputLine : nodeFilePathStrings) {
String[] outputLineSplit = outputLine.split("\\s+", 2);
if (!StringUtils.isBlank(outputLine) && outputLineSplit.length == 2) {
nodeFileSizePathStrings.add(
new Pair<>(Long.valueOf(outputLineSplit[0]), outputLineSplit[1]));
nodeFilePathSizeMap.put(outputLineSplit[1], Long.valueOf(outputLineSplit[0]));
}
}
} catch (IOException e) {
log.error("Error occurred", e);
} finally {
FileUtils.deleteQuietly(new File(localTempFilePath));
}
return nodeFileSizePathStrings;
return nodeFilePathSizeMap;
}

public enum UniverseNodeAction {
Expand Down
Loading

0 comments on commit d6cea59

Please sign in to comment.