using ControlPlane.Core.Config; using ControlPlane.Core.Messages; using Docker.DotNet; using Docker.DotNet.Models; using MassTransit; using Microsoft.Extensions.Options; namespace ControlPlane.Worker.Services; /// /// Manages Clarity.Server Docker containers for provisioned tenants. /// Container naming convention: {env}-app-clarity-{siteCode} /// e.g. fdev-app-clarity-01000014 /// public class ClarityContainerService( IConfiguration config, IOptions infraOptions, IPublishEndpoint bus, ILogger logger) { private ClarityInfraOptions Infra => infraOptions.Value; // The image to run - override via config for prod registries private string ImageName => config["Docker:ClarityServerImage"] ?? "clarity-server:latest"; private DockerClient CreateClient() { var uri = config["Docker:Socket"] ?? "npipe://./pipe/docker_engine"; return new DockerClientConfiguration(new Uri(uri)).CreateClient(); } /// /// Derives the container name from environment + siteCode. /// Convention: {env}-app-clarity-{siteCode} /// public static string ContainerName(string environment, string siteCode) => $"{environment.ToLowerInvariant()}-app-clarity-{siteCode.ToLowerInvariant()}"; /// /// Pulls the image (if not present locally), starts the container on the managed network, /// and writes an nginx conf.d snippet so traffic routes in. /// No host port binding — nginx reaches the container via Docker DNS on the shared network. /// public async Task StartTenantContainerAsync( string environment, string siteCode, string subdomain, string keycloakRealm, string? postgresConnectionString, string? vaultToken, Guid jobId, CancellationToken cancellationToken) { using var docker = CreateClient(); var name = ContainerName(environment, siteCode); // Stop and remove any existing container with this name (idempotent reprovision) await TryRemoveExistingAsync(docker, name, cancellationToken); // Pull image if not already local await EnsureImageAsync(docker, cancellationToken); // All service URLs use stable Docker DNS names on the managed network — no host ports involved. var container = await docker.Containers.CreateContainerAsync(new CreateContainerParameters { Name = name, Image = ImageName, Env = [ "ASPNETCORE_ENVIRONMENT=Production", "ASPNETCORE_URLS=http://+:8080", $"TenantSubdomain={subdomain}", $"Keycloak__BaseUrl={Infra.KeycloakPublicUrl}", $"Keycloak__InternalUrl={Infra.KeycloakInternalUrl}", $"Keycloak__Realm={keycloakRealm}", $"Vault__Address={Infra.VaultInternalUrl}", .. (vaultToken is not null ? (string[])[$"Vault__Token={vaultToken}"] : []), .. (postgresConnectionString is not null ? (string[])[$"ConnectionStrings__postgresdb={postgresConnectionString}"] : []), ], HostConfig = new HostConfig { NetworkMode = Infra.Network, RestartPolicy = new RestartPolicy { Name = RestartPolicyKind.UnlessStopped }, }, Labels = new Dictionary { ["clarity.managed"] = "true", ["clarity.subdomain"] = subdomain, ["clarity.siteCode"] = siteCode, ["clarity.env"] = environment, }, }, cancellationToken); // Ensure Keycloak and Vault are reachable on the managed network via their Docker DNS aliases. // Aspire places them on its own bridge; tenant containers on clarity-net need them aliased here. await EnsureContainerOnNetworkAsync(docker, "keycloak", Infra.Network, "keycloak", cancellationToken); await EnsureContainerOnNetworkAsync(docker, "vault", Infra.Network, "vault", cancellationToken); var started = await docker.Containers.StartContainerAsync(container.ID, null, cancellationToken); if (!started) throw new InvalidOperationException($"Docker failed to start container {name} (id={container.ID})."); logger.LogInformation("Started container {Name} on {Network} (image: {Image})", name, Infra.Network, ImageName); await WriteNginxConfigAsync(subdomain, name, jobId, cancellationToken); return name; } /// /// Stops and removes a tenant container. Called from InfrastructureStep.CompensateAsync. /// public async Task StopAndRemoveAsync(string containerName, CancellationToken cancellationToken) { using var docker = CreateClient(); await TryRemoveExistingAsync(docker, containerName, cancellationToken); logger.LogInformation("Removed container {Name}", containerName); } // -- helpers -- private async Task EnsureImageAsync(DockerClient docker, CancellationToken cancellationToken) { var images = await docker.Images.ListImagesAsync(new ImagesListParameters { Filters = new Dictionary> { ["reference"] = new Dictionary { [ImageName] = true } } }, cancellationToken); if (images.Count > 0) { logger.LogInformation("Image {Image} already present locally.", ImageName); return; } // Local image (no registry host) — pulling from Docker Hub will always fail. // The image must be built manually before provisioning. var isLocalOnly = !ImageName.Contains('/') || ImageName.StartsWith("localhost/"); if (isLocalOnly) { throw new InvalidOperationException( $"Image '{ImageName}' was not found locally and cannot be pulled from a registry. " + $"Build it first from the repo root:{Environment.NewLine}" + $" docker build -f Clarity.Server/Dockerfile -t {ImageName} ." + $"{Environment.NewLine}Then retry provisioning."); } // Registry image — attempt pull logger.LogInformation("Pulling image {Image} from registry...", ImageName); var (repo, tag) = SplitImageTag(ImageName); await docker.Images.CreateImageAsync( new ImagesCreateParameters { FromImage = repo, Tag = tag }, null, new Progress(m => { if (!string.IsNullOrWhiteSpace(m.Status)) logger.LogDebug("[docker pull] {Status} {Progress}", m.Status, m.ProgressMessage); }), cancellationToken); } // -- nginx conf.d helpers -- /// /// Writes /NginxConfig/conf.d/{subdomain}.conf so nginx routes /// {subdomain}.clarity.test → the containe /// Then signals nginx to reload its config without dropping connections. /// private async Task WriteNginxConfigAsync(string subdomain, string containerName, Guid jobId, CancellationToken ct) { var confDPath = config["Nginx:ConfDPath"]; if (string.IsNullOrWhiteSpace(confDPath)) { logger.LogWarning("Nginx:ConfDPath is not configured — skipping nginx conf write for {Subdomain}.", subdomain); return; } var confContent = $$$""" # Auto-generated by ControlPlane.Worker — do not edit manually. # Tenant: {{{subdomain}}} server { listen 443 ssl; server_name {{{subdomain}}}.{{{Infra.Domain}}}; ssl_certificate {{{Infra.NginxCertPath}}}; ssl_certificate_key {{{Infra.NginxCertKeyPath}}}; location / { # Docker DNS resolves the container name on the managed network set $upstream http://{{{containerName}}}:8080; proxy_pass $upstream; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } } """; var confFile = Path.Combine(confDPath, $"{subdomain}.conf"); await File.WriteAllTextAsync(confFile, confContent, ct); logger.LogInformation("Wrote nginx config for {Subdomain} → {Container}", subdomain, containerName); await ReloadNginxAsync(jobId, subdomain, ct); } public async Task RemoveNginxConfigAsync(string subdomain, CancellationToken ct) { var confDPath = config["Nginx:ConfDPath"]; if (string.IsNullOrWhiteSpace(confDPath)) return; var confFile = Path.Combine(confDPath, $"{subdomain}.conf"); if (File.Exists(confFile)) { File.Delete(confFile); logger.LogInformation("Removed nginx config for {Subdomain}", subdomain); await ReloadNginxAsync(Guid.Empty, subdomain, ct); } } /// Sends SIGHUP to the nginx container which triggers a graceful config reload. private async Task ReloadNginxAsync(Guid jobId, string subdomain, CancellationToken ct) { try { using var docker = CreateClient(); // Find the nginx container by image name — Aspire appends a random suffix to the name // so we can't rely on the static name "nginx". var containers = await docker.Containers.ListContainersAsync( new ContainersListParameters { Filters = new Dictionary> { ["ancestor"] = new Dictionary { ["nginx"] = true } } }, ct); var nginx = containers.FirstOrDefault(); if (nginx is null) { logger.LogWarning("nginx container not found — skipping reload."); return; } await docker.Containers.KillContainerAsync(nginx.ID, new ContainerKillParameters { Signal = "HUP" }, ct); var containerName = nginx.Names.FirstOrDefault() ?? nginx.ID; logger.LogInformation("nginx reloaded (container: {Name}).", containerName); if (jobId != Guid.Empty) { await bus.Publish(new ProvisioningProgressEvent { JobId = jobId, Type = "nginx_reloaded", Step = "Container Launch", Message = $"nginx reloaded — route for {subdomain}.{Infra.Domain} is live.", }, ct); } } catch (Exception ex) { logger.LogWarning(ex, "Failed to reload nginx — new tenant route may not be active until next nginx restart."); if (jobId != Guid.Empty) { await bus.Publish(new ProvisioningProgressEvent { JobId = jobId, Type = "diagnostic", Step = "Container Launch", Message = "nginx reload failed — route may not be active.", Detail = ex.ToString(), }, ct); } } } // -- docker helpers -- private static async Task TryRemoveExistingAsync(DockerClient docker, string name, CancellationToken cancellationToken) { try { await docker.Containers.StopContainerAsync(name, new ContainerStopParameters { WaitBeforeKillSeconds = 5 }, cancellationToken); await docker.Containers.RemoveContainerAsync(name, new ContainerRemoveParameters { Force = true }, cancellationToken); } catch (DockerContainerNotFoundException) { /* already gone - fine */ } catch (DockerApiException ex) when (ex.StatusCode == System.Net.HttpStatusCode.NotFound) { /* same */ } } private static (string repo, string tag) SplitImageTag(string image) { var colon = image.LastIndexOf(':'); return colon < 0 ? (image, "latest") : (image[..colon], image[(colon + 1)..]); } /// /// Connects to with the given /// if it isn't already connected. /// Silently no-ops if the container isn't found (it may not be running in all environments). /// private async Task EnsureContainerOnNetworkAsync( DockerClient docker, string containerName, string network, string alias, CancellationToken cancellationToken) { try { var inspect = await docker.Containers.InspectContainerAsync(containerName, cancellationToken); if (inspect.NetworkSettings.Networks.TryGetValue(network, out var existing)) { // Already connected — check whether our alias is present. var hasAlias = existing.Aliases?.Contains(alias, StringComparer.OrdinalIgnoreCase) == true; if (hasAlias) return; // Connected but without the alias — disconnect so we can reconnect with it. await docker.Networks.DisconnectNetworkAsync(network, new NetworkDisconnectParameters { Container = inspect.ID, Force = true, }, cancellationToken); } await docker.Networks.ConnectNetworkAsync(network, new NetworkConnectParameters { Container = inspect.ID, EndpointConfig = new EndpointSettings { Aliases = [alias], }, }, cancellationToken); logger.LogInformation("Connected container '{Container}' to network '{Network}' with alias '{Alias}'.", containerName, network, alias); } catch (DockerContainerNotFoundException) { logger.LogWarning("Container '{Container}' not found — skipping network connect.", containerName); } catch (DockerApiException ex) when (ex.StatusCode == System.Net.HttpStatusCode.NotFound) { logger.LogWarning("Container '{Container}' not found — skipping network connect.", containerName); } catch (Exception ex) { logger.LogWarning(ex, "Could not connect '{Container}' to '{Network}' — tenant JWT validation may fail.", containerName, network); } } }