Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docker, core, editoast: add mode single-worker for all infra #9166

Merged
merged 3 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ docker compose up -d --build
xdg-open http://localhost:4000/
```

(Linux or WSL users can use `scripts/osrd-compose.sh` instead of `docker compose` to enable host networking - useful to launch services in a debugger)
(Linux or WSL users can use `scripts/host-compose.sh` instead of `docker compose` to enable host networking - useful to launch services in a debugger)

## Deployment

Expand Down
39 changes: 37 additions & 2 deletions core/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,14 @@ You'll need **Java 17**
# on Windows
gradlew.bat shadowJar

# Run as service
java -jar build/libs/osrd-all.jar api --editoast-url http://localhost:8090/ -p 8080
# Run as a RabbitMQ single worker for all infra
ALL_INFRA=true java -jar build/libs/osrd-all.jar worker --editoast-url http://localhost:8090/

# Check that an infra can be loaded
java -jar build/libs/osrd-all.jar load-infra --path RAILJSON_INFRA

# Run as web-service (deprecated inside OSRD's stack)
java -jar build/libs/osrd-all.jar api --editoast-url http://localhost:8090/ --port 8080
```

### CLI usage (alternative)
Expand Down Expand Up @@ -81,3 +84,35 @@ To auto-format all source code, run:
```sh
./gradlew spotlessApply
```

### Local run and debug

It is recommended to pass additional Java options to enable the process of big infra:

```sh
-ea -Xmx12g -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof
```

Using a specific script (just through `docker compose` CLI and a set of docker-compose files)
allows to run a single core worker for all infra on localhost network:

```sh
./scripts/single-worker-compose.sh up -d

# or exclude 'core' service straight away:
./scripts/single-worker-compose.sh up -d --scale core=0
```

Then, it is easy to replace the desired component for debug purpose. \
For core:

```sh
./scripts/single-worker-compose.sh down core # if 'core' is running
./gradlew shadowJar && ALL_INFRA=true java -jar -ea -Xmx12g -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dump.hprof build/libs/osrd-all.jar worker --editoast-url http://localhost:8090/
```

Clean or restart the whole stack can be necessary sometimes and is also available
through docker compose CLI (the following wipes the database too):
```sh
./scripts/single-worker-compose.sh down -v
```
34 changes: 21 additions & 13 deletions core/src/main/java/fr/sncf/osrd/cli/WorkerCommand.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ package fr.sncf.osrd.cli

import com.beust.jcommander.Parameter
import com.beust.jcommander.Parameters
import com.rabbitmq.client.*
import com.rabbitmq.client.AMQP
import com.rabbitmq.client.Channel
import com.rabbitmq.client.ConnectionFactory
import com.rabbitmq.client.DeliverCallback
import fr.sncf.osrd.api.*
import fr.sncf.osrd.api.api_v2.conflicts.ConflictDetectionEndpointV2
import fr.sncf.osrd.api.api_v2.path_properties.PathPropEndpoint
Expand Down Expand Up @@ -39,16 +42,18 @@ class WorkerCommand : CliCommand {
private var editoastAuthorization: String = "x-osrd-core"

val WORKER_ID: String?
val WORKER_ID_USE_HOSTNAME: String?
val WORKER_ID_USE_HOSTNAME: Boolean
val WORKER_KEY: String?
val WORKER_AMQP_URI: String
val WORKER_POOL: String
val WORKER_REQUESTS_QUEUE: String
val WORKER_ACTIVITY_EXCHANGE: String
val ALL_INFRA: Boolean

init {
WORKER_ID_USE_HOSTNAME = System.getenv("WORKER_ID_USE_HOSTNAME")
WORKER_KEY = System.getenv("WORKER_KEY")
WORKER_ID_USE_HOSTNAME = getBooleanEnvvar("WORKER_ID_USE_HOSTNAME")
ALL_INFRA = getBooleanEnvvar("ALL_INFRA")
WORKER_KEY = if (ALL_INFRA) "all" else System.getenv("WORKER_KEY")
WORKER_AMQP_URI =
System.getenv("WORKER_AMQP_URI") ?: "amqp://osrd:password@127.0.0.1:5672/%2f"
WORKER_POOL = System.getenv("WORKER_POOL") ?: "core"
Expand All @@ -58,18 +63,19 @@ class WorkerCommand : CliCommand {
System.getenv("WORKER_ACTIVITY_EXCHANGE") ?: "$WORKER_POOL-activity-xchg"

WORKER_ID =
if (
WORKER_ID_USE_HOSTNAME == null ||
WORKER_ID_USE_HOSTNAME == "" ||
WORKER_ID_USE_HOSTNAME == "0" ||
WORKER_ID_USE_HOSTNAME.lowercase() == "false"
) {
System.getenv("WORKER_ID")
} else {
if (WORKER_ID_USE_HOSTNAME) {
java.net.InetAddress.getLocalHost().hostName
} else if (ALL_INFRA) {
"all_infra_worker"
} else {
System.getenv("WORKER_ID")
}
}

private fun getBooleanEnvvar(name: String): Boolean {
return System.getenv(name)?.lowercase() !in arrayOf(null, "", "0", "false")
}

override fun run(): Int {
if (WORKER_ID == null || WORKER_KEY == null) {
throw IllegalStateException(
Expand Down Expand Up @@ -123,7 +129,9 @@ class WorkerCommand : CliCommand {
val connection = factory.newConnection()
connection.createChannel().use { channel -> reportActivity(channel, "started") }

infraManager.load(infraId, null, diagnosticRecorder)
if (!ALL_INFRA) {
infraManager.load(infraId, null, diagnosticRecorder)
}

connection.createChannel().use { channel -> reportActivity(channel, "ready") }

Expand Down
23 changes: 20 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ services:
test: ["CMD", "pg_isready", "-d", "postgres://osrd:password@postgres/osrd"]
start_period: 4s
interval: 5s
retries: 20

valkey:
image: valkey/valkey:alpine
Expand All @@ -35,6 +36,7 @@ services:
test: ["CMD", "valkey-cli", "ping"]
start_period: 4s
interval: 5s
retries: 20

rabbitmq:
image: rabbitmq:4-management
Expand All @@ -49,20 +51,33 @@ services:
- rabbitmq_data:/var/lib/rabbitmq
healthcheck:
test: ["CMD", "rabbitmqctl", "status"]
interval: 30s
timeout: 30s
retries: 3
interval: 5s
timeout: 5s
retries: 20

core:
# This is a dummy container to build the core image
# and document/provide parameters to other compose files
image: ghcr.io/openrailassociation/osrd-edge/osrd-core:${TAG-dev}
container_name: osrd-core-dummy
depends_on:
rabbitmq: {condition: service_healthy}
build:
context: core
dockerfile: Dockerfile
additional_contexts:
test_data: tests/data
static_assets: assets
environment:
# Actual values in ./docker/osrdyne.yml (please maintain consistency)
# Provided here only for reuse in compose layers and doc
CORE_EDITOAST_URL: "http://osrd-editoast"
JAVA_TOOL_OPTIONS: "-javaagent:/app/opentelemetry-javaagent.jar"
CORE_MONITOR_TYPE: "opentelemetry"
OTEL_EXPORTER_OTLP_TRACES_PROTOCOL: "grpc"
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: "http://jaeger:4317"
OTEL_METRICS_EXPORTER: "none"
OTEL_LOGS_EXPORTER: "none"
restart: "no"
command: "true"

Expand All @@ -88,6 +103,7 @@ services:
test: ["CMD", "curl", "-f", "http://localhost:3000"]
start_period: 4s
interval: 5s
retries: 6

editoast:
image: ghcr.io/openrailassociation/osrd-edge/osrd-editoast:${TAG-dev}
Expand Down Expand Up @@ -122,6 +138,7 @@ services:
test: ["CMD", "curl", "-f", "http://localhost/health"]
start_period: 4s
interval: 5s
retries: 6

gateway:
image: ghcr.io/openrailassociation/osrd-edge/osrd-gateway:${TAG-dev}-standalone
Expand Down
8 changes: 7 additions & 1 deletion docker/docker-compose.host.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ services:
network_mode: host
environment:
EDITOAST_PORT: 8090
OSRD_BACKEND_URL: "http://localhost:8080"
OSRDYNE_API_URL: "http://localhost:4242"
VALKEY_URL: "redis://localhost"
DATABASE_URL: "postgres://osrd:password@localhost:5432/osrd"
Expand All @@ -39,6 +38,13 @@ services:
ports: []
network_mode: host

core:
ports: []
network_mode: host
environment:
CORE_EDITOAST_URL: "http://127.0.0.1:8090"
woshilapin marked this conversation as resolved.
Show resolved Hide resolved
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: "http://127.0.0.1:4317"

osrdyne:
ports: []
network_mode: host
Expand Down
5 changes: 0 additions & 5 deletions docker/docker-compose.noopdyne.yml

This file was deleted.

9 changes: 6 additions & 3 deletions docker/docker-compose.pr-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ services:
test: ["CMD", "pg_isready", "-d", "postgres://osrd:password@postgres:5433/osrd"]
start_period: 4s
interval: 5s
retries: 20
networks:
- pr-tests

Expand All @@ -48,6 +49,7 @@ services:
test: ["CMD", "valkey-cli", "-p" , "6380", "ping"]
start_period: 4s
interval: 5s
retries: 20
networks:
- pr-tests

Expand All @@ -67,9 +69,9 @@ services:
- ../docker/rabbitmq-pr-tests.conf:/etc/rabbitmq/rabbitmq.conf
healthcheck:
test: ["CMD", "rabbitmqctl", "status"]
interval: 15s
timeout: 30s
retries: 3
interval: 5s
timeout: 5s
retries: 20
networks:
- pr-tests

Expand Down Expand Up @@ -107,6 +109,7 @@ services:
test: ["CMD", "curl", "-f", "http://localhost:8091/health"]
start_period: 4s
interval: 5s
retries: 6
networks:
- pr-tests

Expand Down
20 changes: 20 additions & 0 deletions docker/docker-compose.single-worker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
version: '3'

services:
editoast:
environment:
- EDITOAST_CORE_SINGLE_WORKER=true

osrdyne:
volumes: !reset []

core:
# Launch a single worker handling all infra
environment:
- ALL_INFRA=true
container_name: osrd-core-all-infra
restart: unless-stopped
command:
- /bin/sh
- -c
- "exec java $JAVA_OPTS -ea -jar /app/osrd_core.jar worker"
25 changes: 0 additions & 25 deletions editoast/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3756,30 +3756,6 @@ components:
type: string
enum:
- editoast:coreclient:BrokenPipe
EditoastCoreErrorCannotExtractResponseBody:
type: object
required:
- type
- status
- message
properties:
context:
type: object
required:
- msg
properties:
msg:
type: string
message:
type: string
status:
type: integer
enum:
- 500
type:
type: string
enum:
- editoast:coreclient:CannotExtractResponseBody
EditoastCoreErrorConnectionClosedBeforeMessageCompleted:
type: object
required:
Expand Down Expand Up @@ -4055,7 +4031,6 @@ components:
- $ref: '#/components/schemas/EditoastCacheOperationErrorDuplicateIdsProvided'
- $ref: '#/components/schemas/EditoastCacheOperationErrorObjectNotFound'
- $ref: '#/components/schemas/EditoastCoreErrorBrokenPipe'
- $ref: '#/components/schemas/EditoastCoreErrorCannotExtractResponseBody'
- $ref: '#/components/schemas/EditoastCoreErrorConnectionClosedBeforeMessageCompleted'
- $ref: '#/components/schemas/EditoastCoreErrorConnectionResetByPeer'
- $ref: '#/components/schemas/EditoastCoreErrorCoreResponseFormatError'
Expand Down
2 changes: 2 additions & 0 deletions editoast/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ pub struct RunserverArgs {
pub mq_url: String,
#[clap(long, env = "EDITOAST_CORE_TIMEOUT", default_value_t = 180)]
pub core_timeout: u64,
#[clap(long, env = "EDITOAST_CORE_SINGLE_WORKER", default_value_t = false)]
pub core_single_worker: bool,
#[clap(long, env = "ROOT_PATH", default_value_t = String::new())]
pub root_path: String,
#[clap(long)]
Expand Down
35 changes: 0 additions & 35 deletions editoast/src/core/http_client.rs

This file was deleted.

Loading
Loading