using System.Diagnostics; using System.Text; using ControlPlane.Core.Config; using ControlPlane.Core.Messages; using ControlPlane.Core.Models; using Docker.DotNet; using Docker.DotNet.Models; using MassTransit; using Microsoft.Extensions.Options; namespace ControlPlane.Worker.Services; /// /// Manages Clarity.Server Docker containers for provisioned tenants. /// Container naming convention: {env}-app-clarity-{siteCode} /// e.g. fdev-app-clarity-01000014 /// public class ClarityContainerService( IConfiguration config, IOptions infraOptions, IPublishEndpoint bus, ILogger logger) { private ClarityInfraOptions Infra => infraOptions.Value; // The image to run - override via config for prod registries private string ImageName => config["Docker:ClarityServerImage"] ?? "clarity-server:latest"; private DockerClient CreateClient() { var uri = config["Docker:Socket"] ?? "npipe://./pipe/docker_engine"; return new DockerClientConfiguration(new Uri(uri)).CreateClient(); } /// /// Derives the container name from environment + siteCode. /// Convention: {env}-app-clarity-{siteCode} /// public static string ContainerName(string environment, string siteCode) => $"{environment.ToLowerInvariant()}-app-clarity-{siteCode.ToLowerInvariant()}"; /// /// Pulls the image (if not present locally), starts the container on the managed network, /// and writes an nginx conf.d snippet so traffic routes in. /// No host port binding — nginx reaches the container via Docker DNS on the shared network. /// public async Task StartTenantContainerAsync( string environment, string siteCode, string subdomain, string keycloakRealm, string? postgresConnectionString, string? vaultToken, Guid jobId, CancellationToken cancellationToken) { using var docker = CreateClient(); var name = ContainerName(environment, siteCode); // Stop and remove any existing container with this name (idempotent reprovision) await TryRemoveExistingAsync(docker, name, cancellationToken); // Pull image if not already local await EnsureImageAsync(docker, cancellationToken); // All service URLs use stable Docker DNS names on the managed network — no host ports involved. var container = await docker.Containers.CreateContainerAsync(new CreateContainerParameters { Name = name, Image = ImageName, Env = [ "ASPNETCORE_ENVIRONMENT=Production", "ASPNETCORE_URLS=http://+:8080", $"TenantSubdomain={subdomain}", $"Keycloak__BaseUrl={Infra.KeycloakPublicUrl}", $"Keycloak__InternalUrl={Infra.KeycloakInternalUrl}", $"Keycloak__Realm={keycloakRealm}", $"Vault__Address={Infra.VaultInternalUrl}", .. (vaultToken is not null ? (string[])[$"Vault__Token={vaultToken}"] : []), .. (postgresConnectionString is not null ? (string[])[$"ConnectionStrings__postgresdb={postgresConnectionString}"] : []), ], HostConfig = new HostConfig { NetworkMode = Infra.Network, RestartPolicy = new RestartPolicy { Name = RestartPolicyKind.UnlessStopped }, // Map *.clarity.test domains to the Docker host gateway so that Clarity.Server, // running inside a container, can reach nginx (which routes *.clarity.test). // This is required for Keycloak OIDC discovery and JWT iss-claim validation — // Keycloak issues tokens with iss=https://keycloak.clarity.test/realms/... // and Clarity.Server must be able to reach that URL for OIDC metadata. ExtraHosts = [ $"keycloak.{Infra.Domain}:host-gateway", $"{subdomain}.{Infra.Domain}:host-gateway", ], }, Labels = new Dictionary { ["clarity.managed"] = "true", ["clarity.subdomain"] = subdomain, ["clarity.siteCode"] = siteCode, ["clarity.env"] = environment, }, }, cancellationToken); // Ensure Keycloak and Vault are reachable on the managed network via their Docker DNS aliases. // Aspire places them on its own bridge; tenant containers on clarity-net need them aliased here. await EnsureContainerOnNetworkAsync(docker, "keycloak", Infra.Network, "keycloak", cancellationToken); await EnsureContainerOnNetworkAsync(docker, "vault", Infra.Network, "vault", cancellationToken); var started = await docker.Containers.StartContainerAsync(container.ID, null, cancellationToken); if (!started) throw new InvalidOperationException($"Docker failed to start container {name} (id={container.ID})."); logger.LogInformation("Started container {Name} on {Network} (image: {Image})", name, Infra.Network, ImageName); await WriteNginxConfigAsync(subdomain, name, jobId, cancellationToken); await WriteComposeArtifactAsync(environment, subdomain, keycloakRealm, name, cancellationToken); return name; } /// /// Stops and removes a tenant container. Called from InfrastructureStep.CompensateAsync. /// public async Task StopAndRemoveAsync(string containerName, CancellationToken cancellationToken) { using var docker = CreateClient(); await TryRemoveExistingAsync(docker, containerName, cancellationToken); logger.LogInformation("Removed container {Name}", containerName); } // -- helpers -- private async Task EnsureImageAsync(DockerClient docker, CancellationToken cancellationToken) { var images = await docker.Images.ListImagesAsync(new ImagesListParameters { Filters = new Dictionary> { ["reference"] = new Dictionary { [ImageName] = true } } }, cancellationToken); if (images.Count > 0) { logger.LogInformation("Image {Image} already present locally.", ImageName); return; } // Local image (no registry host) — pulling from Docker Hub will always fail. // The image must be built manually before provisioning. var isLocalOnly = !ImageName.Contains('/') || ImageName.StartsWith("localhost/"); if (isLocalOnly) { throw new InvalidOperationException( $"Image '{ImageName}' was not found locally and cannot be pulled from a registry. " + $"Build it first from the repo root:{Environment.NewLine}" + $" docker build -f Clarity.Server/Dockerfile -t {ImageName} ." + $"{Environment.NewLine}Then retry provisioning."); } // Registry image — attempt pull logger.LogInformation("Pulling image {Image} from registry...", ImageName); var (repo, tag) = SplitImageTag(ImageName); await docker.Images.CreateImageAsync( new ImagesCreateParameters { FromImage = repo, Tag = tag }, null, new Progress(m => { if (!string.IsNullOrWhiteSpace(m.Status)) logger.LogDebug("[docker pull] {Status} {Progress}", m.Status, m.ProgressMessage); }), cancellationToken); } // -- nginx conf.d helpers -- /// /// Writes /NginxConfig/conf.d/{subdomain}.conf so nginx routes /// {subdomain}.clarity.test → the containe /// Then signals nginx to reload its config without dropping connections. /// private async Task WriteNginxConfigAsync(string subdomain, string containerName, Guid jobId, CancellationToken ct) { var confDPath = config["Nginx:ConfDPath"]; if (string.IsNullOrWhiteSpace(confDPath)) { logger.LogWarning("Nginx:ConfDPath is not configured — skipping nginx conf write for {Subdomain}.", subdomain); return; } var confContent = $$$""" # Auto-generated by ControlPlane.Worker — do not edit manually. # Tenant: {{{subdomain}}} server { listen 443 ssl; server_name {{{subdomain}}}.{{{Infra.Domain}}}; ssl_certificate {{{Infra.NginxCertPath}}}; ssl_certificate_key {{{Infra.NginxCertKeyPath}}}; location / { # Docker DNS resolves the container name on the managed network set $upstream http://{{{containerName}}}:8080; proxy_pass $upstream; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } } """; var confFile = Path.Combine(confDPath, $"{subdomain}.conf"); await File.WriteAllTextAsync(confFile, confContent, ct); logger.LogInformation("Wrote nginx config for {Subdomain} → {Container}", subdomain, containerName); await ReloadNginxAsync(jobId, subdomain, ct); } public async Task RemoveNginxConfigAsync(string subdomain, CancellationToken ct) { var confDPath = config["Nginx:ConfDPath"]; if (string.IsNullOrWhiteSpace(confDPath)) return; var confFile = Path.Combine(confDPath, $"{subdomain}.conf"); if (File.Exists(confFile)) { File.Delete(confFile); logger.LogInformation("Removed nginx config for {Subdomain}", subdomain); await ReloadNginxAsync(Guid.Empty, subdomain, ct); } } /// Sends SIGHUP to the nginx container which triggers a graceful config reload. private async Task ReloadNginxAsync(Guid jobId, string subdomain, CancellationToken ct) { try { using var docker = CreateClient(); // Find the nginx container by image name — Aspire appends a random suffix to the name // so we can't rely on the static name "nginx". var containers = await docker.Containers.ListContainersAsync( new ContainersListParameters { Filters = new Dictionary> { ["ancestor"] = new Dictionary { ["nginx"] = true } } }, ct); var nginx = containers.FirstOrDefault(); if (nginx is null) { logger.LogWarning("nginx container not found — skipping reload."); return; } await docker.Containers.KillContainerAsync(nginx.ID, new ContainerKillParameters { Signal = "HUP" }, ct); var containerName = nginx.Names.FirstOrDefault() ?? nginx.ID; logger.LogInformation("nginx reloaded (container: {Name}).", containerName); if (jobId != Guid.Empty) { await bus.Publish(new ProvisioningProgressEvent { JobId = jobId, Type = "nginx_reloaded", Step = "Container Launch", Message = $"nginx reloaded — route for {subdomain}.{Infra.Domain} is live.", }, ct); } } catch (Exception ex) { logger.LogWarning(ex, "Failed to reload nginx — new tenant route may not be active until next nginx restart."); if (jobId != Guid.Empty) { await bus.Publish(new ProvisioningProgressEvent { JobId = jobId, Type = "diagnostic", Step = "Container Launch", Message = "nginx reload failed — route may not be active.", Detail = ex.ToString(), }, ct); } } } // -- docker helpers -- private static async Task TryRemoveExistingAsync(DockerClient docker, string name, CancellationToken cancellationToken) { try { await docker.Containers.StopContainerAsync(name, new ContainerStopParameters { WaitBeforeKillSeconds = 5 }, cancellationToken); await docker.Containers.RemoveContainerAsync(name, new ContainerRemoveParameters { Force = true }, cancellationToken); } catch (DockerContainerNotFoundException) { /* already gone - fine */ } catch (DockerApiException ex) when (ex.StatusCode == System.Net.HttpStatusCode.NotFound) { /* same */ } } private static (string repo, string tag) SplitImageTag(string image) { var colon = image.LastIndexOf(':'); return colon < 0 ? (image, "latest") : (image[..colon], image[(colon + 1)..]); } /// /// Connects to with the given /// if it isn't already connected. /// Silently no-ops if the container isn't found (it may not be running in all environments). /// private async Task EnsureContainerOnNetworkAsync( DockerClient docker, string containerName, string network, string alias, CancellationToken cancellationToken) { try { var inspect = await docker.Containers.InspectContainerAsync(containerName, cancellationToken); if (inspect.NetworkSettings.Networks.TryGetValue(network, out var existing)) { // Already connected — check whether our alias is present. var hasAlias = existing.Aliases?.Contains(alias, StringComparer.OrdinalIgnoreCase) == true; if (hasAlias) return; // Connected but without the alias — disconnect so we can reconnect with it. await docker.Networks.DisconnectNetworkAsync(network, new NetworkDisconnectParameters { Container = inspect.ID, Force = true, }, cancellationToken); } await docker.Networks.ConnectNetworkAsync(network, new NetworkConnectParameters { Container = inspect.ID, EndpointConfig = new EndpointSettings { Aliases = [alias], }, }, cancellationToken); logger.LogInformation("Connected container '{Container}' to network '{Network}' with alias '{Alias}'.", containerName, network, alias); } catch (DockerContainerNotFoundException) { logger.LogWarning("Container '{Container}' not found — skipping network connect.", containerName); } catch (DockerApiException ex) when (ex.StatusCode == System.Net.HttpStatusCode.NotFound) { logger.LogWarning("Container '{Container}' not found — skipping network connect.", containerName); } catch (Exception ex) { logger.LogWarning(ex, "Could not connect '{Container}' to '{Network}' — tenant JWT validation may fail.", containerName, network); } } // ── ClientAssets / compose artifact helpers ────────────────────────────── private string ClientAssetsFolder(string subdomain) { var root = config["ClientAssets__Folder"] ?? config["ClientAssets:Folder"] ?? Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "ClientAssets")); return Path.Combine(root, subdomain); } /// /// Writes a docker-compose.yml to ClientAssets/{subdomain}/ documenting the SharedPlatform /// clarity-server deployment. The file is an audit artifact — it is NOT executed by the Worker. /// private async Task WriteComposeArtifactAsync( string environment, string subdomain, string keycloakRealm, string containerName, CancellationToken ct) { var folder = ClientAssetsFolder(subdomain); try { Directory.CreateDirectory(folder); var content = $$$""" # Auto-generated by ControlPlane.Worker — do not edit manually. # Tenant: {{{subdomain}}} # Tier: SharedPlatform # Generated: {{{DateTimeOffset.UtcNow:O}}} name: clarity-{{{subdomain}}} services: app-{{{subdomain}}}: image: {{{ImageName}}} restart: unless-stopped container_name: {{{containerName}}} environment: ASPNETCORE_ENVIRONMENT: Production ASPNETCORE_URLS: http://+:8080 TenantSubdomain: {{{subdomain}}} Keycloak__BaseUrl: {{{Infra.KeycloakPublicUrl}}} Keycloak__InternalUrl: {{{Infra.KeycloakInternalUrl}}} Keycloak__Realm: {{{keycloakRealm}}} Vault__Address: {{{Infra.VaultInternalUrl}}} # ConnectionStrings__postgresdb: (persisted in TenantRecord) networks: - clarity-net extra_hosts: - "keycloak.{{{Infra.Domain}}}:host-gateway" - "{{{subdomain}}}.{{{Infra.Domain}}}:host-gateway" labels: clarity.managed: "true" clarity.subdomain: {{{subdomain}}} clarity.env: {{{environment}}} networks: clarity-net: external: true """; var composePath = Path.Combine(folder, "docker-compose.yml"); await File.WriteAllTextAsync(composePath, content, ct); logger.LogInformation("Wrote compose artifact for {Subdomain} → {Path}", subdomain, composePath); } catch (Exception ex) { // Non-fatal — the container is already running; the artifact is an audit record. logger.LogWarning(ex, "Could not write compose artifact for {Subdomain}.", subdomain); } } // ── OwnContainer — sidecar lifecycle ───────────────────────────────────── /// /// OwnContainer tier — generates a per-tenant docker-compose.yml for sidecar services /// (Keycloak, Vault, Postgres, MinIO as elected by StackConfig), writes it to /// ClientAssets/{subdomain}/docker-compose.yml, and runs docker compose up -d. /// Returns the absolute path to the compose file. /// public async Task GenerateAndRunSidecarsAsync( ProvisioningJob job, Dictionary topology, CancellationToken ct) { var folder = ClientAssetsFolder(job.Subdomain); Directory.CreateDirectory(folder); var content = BuildSidecarCompose(job); var composePath = Path.Combine(folder, "docker-compose.yml"); await File.WriteAllTextAsync(composePath, content, ct); logger.LogInformation("[{JobId}] Wrote sidecar compose → {Path}", job.Id, composePath); await RunDockerComposeAsync(composePath, "up -d", job.Id, ct); logger.LogInformation("[{JobId}] Sidecar containers started.", job.Id); return composePath; } /// /// After sidecars are started, inspects each OwnContainer component's Docker container /// to resolve its ephemeral host port, then rewrites the topology AdminUrl to /// http://localhost:{hostPort} so downstream saga steps can call admin APIs. /// public async Task UpdateTopologyWithHostPortsAsync( Dictionary topology, CancellationToken ct) { using var docker = CreateClient(); foreach (var (component, endpoint) in topology.ToList()) { if (endpoint.Mode != ComponentMode.OwnContainer) continue; if (string.IsNullOrWhiteSpace(endpoint.ContainerName)) continue; try { var inspect = await docker.Containers.InspectContainerAsync(endpoint.ContainerName, ct); var firstBinding = inspect.NetworkSettings.Ports .SelectMany(p => p.Value ?? []) .FirstOrDefault(b => !string.IsNullOrWhiteSpace(b.HostPort)); if (firstBinding is not null) { topology[component] = endpoint with { AdminUrl = $"http://localhost:{firstBinding.HostPort}" }; logger.LogInformation("Resolved {Component} host port → {Url}", component, topology[component].AdminUrl); } else { logger.LogWarning("No host port binding found for {Component} container {Name}.", component, endpoint.ContainerName); } } catch (Exception ex) { logger.LogWarning(ex, "Could not resolve host port for {Component} container {Name}.", component, endpoint.ContainerName); } } } /// /// Tears down all sidecar containers for a tenant by running /// docker compose down -v against the stored compose file. /// Called from InfrastructureProvisioningStep.CompensateAsync. /// public async Task TearDownComposeProjectAsync(string subdomain, CancellationToken ct) { var composePath = Path.Combine(ClientAssetsFolder(subdomain), "docker-compose.yml"); if (!File.Exists(composePath)) { logger.LogWarning("No compose file found for {Subdomain} — nothing to tear down.", subdomain); return; } await RunDockerComposeAsync(composePath, "down -v", Guid.Empty, ct); logger.LogInformation("Tore down sidecar containers for {Subdomain}.", subdomain); } /// /// Builds the docker-compose YAML content for OwnContainer sidecar services. /// Services are included conditionally based on StackConfig. clarity-net is /// declared as an external network so all sidecars join the shared platform network. /// /// All services include extra_hosts: host-gateway entries for *.clarity.test so that /// intra-container calls that go through nginx (e.g. OIDC discovery) route correctly. /// private string BuildSidecarCompose(ProvisioningJob job) { var s = job.Subdomain; var stack = job.StackConfig; var sb = new StringBuilder(); sb.AppendLine($""" # Auto-generated by ControlPlane.Worker — do not edit manually. # Tenant: {s} | Tier: {job.Tier} # Generated: {DateTimeOffset.UtcNow:O} name: clarity-{s} services: """); // ── Postgres ────────────────────────────────────────────────────────── if (stack.Postgres == ComponentMode.OwnContainer) { sb.AppendLine($""" pg-{s}: image: postgres:16 restart: unless-stopped environment: POSTGRES_USER: clarity POSTGRES_PASSWORD: ${{POSTGRES_PASSWORD:-clarity-dev}} POSTGRES_DB: clarity expose: - "5432" ports: - "127.0.0.1::5432" healthcheck: test: ["CMD-SHELL", "pg_isready -U clarity"] interval: 10s timeout: 5s retries: 5 networks: - clarity-net labels: clarity.managed: "true" clarity.subdomain: {s} clarity.component: postgres """); } // ── Keycloak ────────────────────────────────────────────────────────── if (stack.Keycloak == ComponentMode.OwnContainer) { var kcHostname = $"kc.{s}.{Infra.Domain}"; var dependsBlock = stack.Postgres == ComponentMode.OwnContainer ? $""" depends_on: pg-{s}: condition: service_healthy """ : string.Empty; sb.AppendLine($""" kc-{s}: image: quay.io/keycloak/keycloak:latest restart: unless-stopped command: start-dev environment: KEYCLOAK_ADMIN: admin KEYCLOAK_ADMIN_PASSWORD: ${{KEYCLOAK_ADMIN_PASSWORD:-admin}} KC_DB: postgres KC_DB_URL_HOST: pg-{s} KC_DB_URL_DATABASE: keycloak KC_DB_USERNAME: clarity KC_DB_PASSWORD: ${{POSTGRES_PASSWORD:-clarity-dev}} KC_HOSTNAME: {kcHostname} KC_HOSTNAME_STRICT: "false" KC_HTTP_ENABLED: "true" expose: - "8080" ports: - "127.0.0.1::8080" networks: - clarity-net extra_hosts: - "{kcHostname}:host-gateway" {dependsBlock} labels: clarity.managed: "true" clarity.subdomain: {s} clarity.component: keycloak """); } // ── Vault ───────────────────────────────────────────────────────────── if (stack.Vault == ComponentMode.OwnContainer) { sb.AppendLine($""" vault-{s}: image: hashicorp/vault:latest restart: unless-stopped cap_add: - IPC_LOCK environment: VAULT_DEV_ROOT_TOKEN_ID: ${{VAULT_TOKEN:-vault-dev-root}} VAULT_DEV_LISTEN_ADDRESS: "0.0.0.0:8200" expose: - "8200" ports: - "127.0.0.1::8200" networks: - clarity-net labels: clarity.managed: "true" clarity.subdomain: {s} clarity.component: vault """); } // ── MinIO ───────────────────────────────────────────────────────────── if (stack.Minio == ComponentMode.OwnContainer) { sb.AppendLine($""" minio-{s}: image: minio/minio:latest restart: unless-stopped command: server /data --console-address ":9001" environment: MINIO_ROOT_USER: ${{MINIO_ROOT_USER:-minio}} MINIO_ROOT_PASSWORD: ${{MINIO_ROOT_PASSWORD:-minio-dev}} expose: - "9000" - "9001" ports: - "127.0.0.1::9000" - "127.0.0.1::9001" networks: - clarity-net labels: clarity.managed: "true" clarity.subdomain: {s} clarity.component: minio """); } sb.AppendLine(""" networks: clarity-net: external: true """); return sb.ToString(); } /// /// Runs docker compose -f {composePath} {args} as a child process. /// Streams stdout/stderr to the logger and throws on non-zero exit. /// private async Task RunDockerComposeAsync(string composePath, string args, Guid jobId, CancellationToken ct) { var psi = new ProcessStartInfo("docker") { Arguments = $"compose -f \"{composePath}\" {args}", WorkingDirectory = Path.GetDirectoryName(composePath)!, RedirectStandardOutput = true, RedirectStandardError = true, UseShellExecute = false, }; using var process = Process.Start(psi) ?? throw new InvalidOperationException("Failed to start docker compose process."); var stdoutTask = process.StandardOutput.ReadToEndAsync(ct); var stderrTask = process.StandardError.ReadToEndAsync(ct); await process.WaitForExitAsync(ct); var stdout = await stdoutTask; var stderr = await stderrTask; if (!string.IsNullOrWhiteSpace(stdout)) logger.LogInformation("[docker compose] {Output}", stdout.Trim()); if (!string.IsNullOrWhiteSpace(stderr)) logger.LogInformation("[docker compose stderr] {Output}", stderr.Trim()); if (process.ExitCode != 0) throw new InvalidOperationException( $"'docker compose {args}' exited with code {process.ExitCode}. See logs for details."); } }