From 60ef53c99c734de7c5e508feaf5cc55be051e311 Mon Sep 17 00:00:00 2001 From: Chad Wilson Date: Sat, 12 Feb 2022 15:19:17 +0800 Subject: [PATCH] Fix bug that blocks creating/launching instances across multiple ECS profiles and subnets If you have two ECS cluster profiles which have different subnet configurations, it is possible for the plugin to get stuck and be unable to create instances. This is because when instances/containers are searched for on EC2, they are not filtered by the `cluster-name`, so you may get returned instances for a different cluster profile than the one you are trying to create. That may be intentional, as the goal here is to launch an instance in the best subnet, and cluster profiles may be sharing subnets. If you end up with state cluster-profile-A --> instance-A --> subnet-A and you then need to launch an instance in cluster-profile-B in one of (subnet-B, subnet-C) ... the previous code fails to handle it, because is finds an instance, but then filters by relevant subnets, and then doesn't handle the case where there are no subnets after counting by index. --- .../ecs/aws/SpotInstanceService.java | 19 ++++++++----------- .../elasticagent/ecs/aws/SubnetSelector.java | 11 +++++++++-- .../ecs/aws/SubnetSelectorTest.java | 17 +++++++++++++++++ 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/main/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SpotInstanceService.java b/src/main/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SpotInstanceService.java index 8a31da6..eb1b0ee 100644 --- a/src/main/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SpotInstanceService.java +++ b/src/main/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SpotInstanceService.java @@ -34,7 +34,6 @@ import static com.thoughtworks.gocd.elasticagent.ecs.domain.SpotRequestState.ACTIVE; import static com.thoughtworks.gocd.elasticagent.ecs.domain.SpotRequestState.OPEN; import static java.lang.String.format; -import static java.util.Arrays.asList; import static java.util.stream.Collectors.*; import static org.apache.commons.collections4.CollectionUtils.union; import static org.apache.commons.lang3.StringUtils.containsAny; @@ -46,7 +45,7 @@ public class SpotInstanceService { private final ContainerInstanceHelper containerInstanceHelper; private final TerminateOperation terminateOperation; private final SpotRequestMatcher spotRequestMatcher; - private Set untaggedSpotRequests = Collections.synchronizedSet(new HashSet<>()); + private final Set untaggedSpotRequests = Collections.synchronizedSet(new HashSet<>()); private static final SpotInstanceService spotInstanceService = new SpotInstanceService(); private SpotInstanceService() { @@ -150,15 +149,13 @@ public void tagSpotInstances(PluginSettings pluginSettings) { public void refreshUnTaggedSpotRequests(PluginSettings pluginSettings) { synchronized (untaggedSpotRequests) { - List allSpotRequestsforCluster = spotInstanceHelper.getAllSpotRequestsForCluster(pluginSettings); - List spotRequestIds = allSpotRequestsforCluster.stream().map(SpotInstanceRequest::getSpotInstanceRequestId).collect(toList()); + List allSpotRequestsForCluster = spotInstanceHelper.getAllSpotRequestsForCluster(pluginSettings); + List spotRequestIds = allSpotRequestsForCluster.stream().map(SpotInstanceRequest::getSpotInstanceRequestId).collect(toList()); - LOG.debug("[refresh-spot-requests] All SpotRequests for Cluster: '{}'", spotRequestIds.stream().collect(joining())); + LOG.debug("[refresh-spot-requests] All SpotRequests for Cluster: '{}'", String.join("", spotRequestIds)); LOG.debug("[refresh-spot-requests] All Untagged spot requests: '{}'", untaggedSpotRequestIds(untaggedSpotRequests)); - untaggedSpotRequests.removeIf(request -> { - return spotRequestIds.contains(request.getSpotInstanceRequestId()); - }); + untaggedSpotRequests.removeIf(request -> spotRequestIds.contains(request.getSpotInstanceRequestId())); } } @@ -170,7 +167,7 @@ private String untaggedSpotRequestIds(Set untaggedSpotReque private List spotRequestsWithoutRegisteredInstances(PluginSettings pluginSettings, Platform platform, List allRegisteredSpotInstancesForPlatform) { List registeredInstanceIds = allRegisteredSpotInstancesForPlatform.stream() - .map(instance -> instance.getInstanceId()).collect(toList()); + .map(Instance::getInstanceId).collect(toList()); List spotRequests = spotInstanceHelper .getAllOpenOrSpotRequestsWithRunningInstances(pluginSettings, pluginSettings.getClusterName(), platform); @@ -226,7 +223,7 @@ private String instanceIds(List idleInstancesWithoutTag) { } private Predicate getIdleInstancePredicate() { - return instance -> !instance.getTags().stream().anyMatch(tag -> tag.getKey().equals(LAST_SEEN_IDLE)); + return instance -> instance.getTags().stream().noneMatch(tag -> tag.getKey().equals(LAST_SEEN_IDLE)); } private Map> groupSpotRequestsByPlatform(List spotInstanceRequests) { @@ -246,7 +243,7 @@ private void tagSpotRequest(PluginSettings pluginSettings, ElasticAgentProfilePr LOG.debug("[create-agent] Tagging open spot request."); spotInstanceRequest.withTags(new Tag().withKey("platform").withValue(elasticAgentProfileProperties.platform().name())); - spotInstanceHelper.tagSpotResources(pluginSettings, asList(spotInstanceRequest.getSpotInstanceRequestId()), elasticAgentProfileProperties.platform()); + spotInstanceHelper.tagSpotResources(pluginSettings, List.of(spotInstanceRequest.getSpotInstanceRequestId()), elasticAgentProfileProperties.platform()); } catch (Exception e) { LOG.error("[create-agent] There were errors while tagging spot instance request with id: '{}' cancelling the request.", spotInstanceRequest.getSpotInstanceRequestId(), e); diff --git a/src/main/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SubnetSelector.java b/src/main/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SubnetSelector.java index edb788e..bfaf215 100644 --- a/src/main/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SubnetSelector.java +++ b/src/main/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SubnetSelector.java @@ -49,8 +49,15 @@ public Subnet selectSubnetWithMinimumEC2Instances(PluginSettings pluginSettings, private Subnet findSubnetWithMinimumInstances(List subnets, List instances) { final Map instancePerSubnet = instancePerSubnet(subnets, instances); + // Can happen if all found instances happen to be from other elastic agent profiles (which may or may not + // be sharing a profile with our plugin configuration + if (instancePerSubnet.isEmpty()) { + return subnets.get(0); + } + return subnets.stream() - .filter(subnet -> !instancePerSubnet.containsKey(subnet)).findFirst() + .filter(subnet -> !instancePerSubnet.containsKey(subnet)) + .findFirst() .orElse(Collections.min(instancePerSubnet.entrySet(), Comparator.comparingDouble(Map.Entry::getValue)).getKey()); } @@ -58,7 +65,7 @@ private Map instancePerSubnet(List subnets, List final Map subnetMap = subnets.stream().collect(Collectors.toMap(Subnet::getSubnetId, subnet -> subnet)); return instances.stream() - .filter(instance -> subnetMap.keySet().contains(instance.getSubnetId())) + .filter(instance -> subnetMap.containsKey(instance.getSubnetId())) .collect(Collectors.groupingBy(subnet -> subnetMap.get(subnet.getSubnetId()), Collectors.counting())); } diff --git a/src/test/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SubnetSelectorTest.java b/src/test/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SubnetSelectorTest.java index f6918f7..5373b42 100644 --- a/src/test/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SubnetSelectorTest.java +++ b/src/test/java/com/thoughtworks/gocd/elasticagent/ecs/aws/SubnetSelectorTest.java @@ -89,6 +89,23 @@ void shouldReturnFirstAvailableSubnetIdIfNoContainerInstanceRunning() { assertThat(argumentCaptor.getValue().getSubnetIds()).isEqualTo(Arrays.asList("subnet-1")); } + @Test + void shouldReturnFirstAvailableSubnetIdIfContainerInstanceRunningInUnrelatedSubnet() { + final ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(DescribeSubnetsRequest.class); + + when(pluginSettings.getSubnetIds()).thenReturn(Arrays.asList("subnet-1")); + when(ec2Client.describeSubnets(argumentCaptor.capture())).thenReturn(new DescribeSubnetsResult().withSubnets( + new Subnet().withSubnetId("subnet-1").withState("available") + )); + + Instance instanceInOtherSubnet = new Instance().withSubnetId("some-other-subnet"); + + final Subnet subnet = subnetSelector.selectSubnetWithMinimumEC2Instances(pluginSettings, pluginSettings.getSubnetIds(), List.of(instanceInOtherSubnet)); + + assertThat(subnet.getSubnetId()).isEqualTo("subnet-1"); + assertThat(argumentCaptor.getValue().getSubnetIds()).isEqualTo(Arrays.asList("subnet-1")); + } + @Test void shouldReturnSubnetIdWhichIsHavingMinimumEC2InstanceRunning() { final ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(DescribeSubnetsRequest.class);