359 lines
15 KiB
C#
359 lines
15 KiB
C#
using ControlPlane.Core.Config;
|
|
using ControlPlane.Core.Messages;
|
|
using Docker.DotNet;
|
|
using Docker.DotNet.Models;
|
|
using MassTransit;
|
|
using Microsoft.Extensions.Options;
|
|
|
|
namespace ControlPlane.Worker.Services;
|
|
|
|
/// <summary>
|
|
/// Manages Clarity.Server Docker containers for provisioned tenants.
|
|
/// Container naming convention: {env}-app-clarity-{siteCode}
|
|
/// e.g. fdev-app-clarity-01000014
|
|
/// </summary>
|
|
public class ClarityContainerService(
|
|
IConfiguration config,
|
|
IOptions<ClarityInfraOptions> infraOptions,
|
|
IPublishEndpoint bus,
|
|
ILogger<ClarityContainerService> logger)
|
|
{
|
|
private ClarityInfraOptions Infra => infraOptions.Value;
|
|
|
|
// The image to run - override via config for prod registries
|
|
private string ImageName => config["Docker:ClarityServerImage"] ?? "clarity-server:latest";
|
|
|
|
private DockerClient CreateClient()
|
|
{
|
|
var uri = config["Docker:Socket"] ?? "npipe://./pipe/docker_engine";
|
|
return new DockerClientConfiguration(new Uri(uri)).CreateClient();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Derives the container name from environment + siteCode.
|
|
/// Convention: {env}-app-clarity-{siteCode}
|
|
/// </summary>
|
|
public static string ContainerName(string environment, string siteCode) =>
|
|
$"{environment.ToLowerInvariant()}-app-clarity-{siteCode.ToLowerInvariant()}";
|
|
|
|
/// <summary>
|
|
/// Pulls the image (if not present locally), starts the container on the managed network,
|
|
/// and writes an nginx conf.d snippet so traffic routes in.
|
|
/// No host port binding — nginx reaches the container via Docker DNS on the shared network.
|
|
/// </summary>
|
|
public async Task<string> StartTenantContainerAsync(
|
|
string environment,
|
|
string siteCode,
|
|
string subdomain,
|
|
string keycloakRealm,
|
|
string? postgresConnectionString,
|
|
string? vaultToken,
|
|
Guid jobId,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
using var docker = CreateClient();
|
|
var name = ContainerName(environment, siteCode);
|
|
|
|
// Stop and remove any existing container with this name (idempotent reprovision)
|
|
await TryRemoveExistingAsync(docker, name, cancellationToken);
|
|
|
|
// Pull image if not already local
|
|
await EnsureImageAsync(docker, cancellationToken);
|
|
|
|
// All service URLs use stable Docker DNS names on the managed network — no host ports involved.
|
|
var container = await docker.Containers.CreateContainerAsync(new CreateContainerParameters
|
|
{
|
|
Name = name,
|
|
Image = ImageName,
|
|
Env =
|
|
[
|
|
"ASPNETCORE_ENVIRONMENT=Production",
|
|
"ASPNETCORE_URLS=http://+:8080",
|
|
$"TenantSubdomain={subdomain}",
|
|
$"Keycloak__BaseUrl={Infra.KeycloakPublicUrl}",
|
|
$"Keycloak__InternalUrl={Infra.KeycloakInternalUrl}",
|
|
$"Keycloak__Realm={keycloakRealm}",
|
|
$"Vault__Address={Infra.VaultInternalUrl}",
|
|
.. (vaultToken is not null
|
|
? (string[])[$"Vault__Token={vaultToken}"]
|
|
: []),
|
|
.. (postgresConnectionString is not null
|
|
? (string[])[$"ConnectionStrings__postgresdb={postgresConnectionString}"]
|
|
: []),
|
|
],
|
|
HostConfig = new HostConfig
|
|
{
|
|
NetworkMode = Infra.Network,
|
|
RestartPolicy = new RestartPolicy { Name = RestartPolicyKind.UnlessStopped },
|
|
},
|
|
Labels = new Dictionary<string, string>
|
|
{
|
|
["clarity.managed"] = "true",
|
|
["clarity.subdomain"] = subdomain,
|
|
["clarity.siteCode"] = siteCode,
|
|
["clarity.env"] = environment,
|
|
},
|
|
}, cancellationToken);
|
|
|
|
// Ensure Keycloak and Vault are reachable on the managed network via their Docker DNS aliases.
|
|
// Aspire places them on its own bridge; tenant containers on clarity-net need them aliased here.
|
|
await EnsureContainerOnNetworkAsync(docker, "keycloak", Infra.Network, "keycloak", cancellationToken);
|
|
await EnsureContainerOnNetworkAsync(docker, "vault", Infra.Network, "vault", cancellationToken);
|
|
|
|
var started = await docker.Containers.StartContainerAsync(container.ID, null, cancellationToken);
|
|
if (!started)
|
|
throw new InvalidOperationException($"Docker failed to start container {name} (id={container.ID}).");
|
|
|
|
logger.LogInformation("Started container {Name} on {Network} (image: {Image})", name, Infra.Network, ImageName);
|
|
|
|
await WriteNginxConfigAsync(subdomain, name, jobId, cancellationToken);
|
|
|
|
return name;
|
|
}
|
|
/// <summary>
|
|
/// Stops and removes a tenant container. Called from InfrastructureStep.CompensateAsync.
|
|
/// </summary>
|
|
public async Task StopAndRemoveAsync(string containerName, CancellationToken cancellationToken)
|
|
{
|
|
using var docker = CreateClient();
|
|
await TryRemoveExistingAsync(docker, containerName, cancellationToken);
|
|
logger.LogInformation("Removed container {Name}", containerName);
|
|
}
|
|
|
|
// -- helpers --
|
|
|
|
private async Task EnsureImageAsync(DockerClient docker, CancellationToken cancellationToken)
|
|
{
|
|
var images = await docker.Images.ListImagesAsync(new ImagesListParameters
|
|
{
|
|
Filters = new Dictionary<string, IDictionary<string, bool>>
|
|
{
|
|
["reference"] = new Dictionary<string, bool> { [ImageName] = true }
|
|
}
|
|
}, cancellationToken);
|
|
|
|
if (images.Count > 0)
|
|
{
|
|
logger.LogInformation("Image {Image} already present locally.", ImageName);
|
|
return;
|
|
}
|
|
|
|
// Local image (no registry host) — pulling from Docker Hub will always fail.
|
|
// The image must be built manually before provisioning.
|
|
var isLocalOnly = !ImageName.Contains('/') || ImageName.StartsWith("localhost/");
|
|
if (isLocalOnly)
|
|
{
|
|
throw new InvalidOperationException(
|
|
$"Image '{ImageName}' was not found locally and cannot be pulled from a registry. " +
|
|
$"Build it first from the repo root:{Environment.NewLine}" +
|
|
$" docker build -f Clarity.Server/Dockerfile -t {ImageName} ." +
|
|
$"{Environment.NewLine}Then retry provisioning.");
|
|
}
|
|
|
|
// Registry image — attempt pull
|
|
logger.LogInformation("Pulling image {Image} from registry...", ImageName);
|
|
var (repo, tag) = SplitImageTag(ImageName);
|
|
await docker.Images.CreateImageAsync(
|
|
new ImagesCreateParameters { FromImage = repo, Tag = tag },
|
|
null,
|
|
new Progress<JSONMessage>(m =>
|
|
{
|
|
if (!string.IsNullOrWhiteSpace(m.Status))
|
|
logger.LogDebug("[docker pull] {Status} {Progress}", m.Status, m.ProgressMessage);
|
|
}),
|
|
cancellationToken);
|
|
}
|
|
|
|
// -- nginx conf.d helpers --
|
|
|
|
/// <summary>
|
|
/// Writes /NginxConfig/conf.d/{subdomain}.conf so nginx routes
|
|
/// {subdomain}.clarity.test → the containe
|
|
/// Then signals nginx to reload its config without dropping connections.
|
|
/// </summary>
|
|
private async Task WriteNginxConfigAsync(string subdomain, string containerName, Guid jobId, CancellationToken ct)
|
|
{
|
|
var confDPath = config["Nginx:ConfDPath"];
|
|
if (string.IsNullOrWhiteSpace(confDPath))
|
|
{
|
|
logger.LogWarning("Nginx:ConfDPath is not configured — skipping nginx conf write for {Subdomain}.", subdomain);
|
|
return;
|
|
}
|
|
|
|
var confContent = $$$"""
|
|
# Auto-generated by ControlPlane.Worker — do not edit manually.
|
|
# Tenant: {{{subdomain}}}
|
|
server {
|
|
listen 443 ssl;
|
|
server_name {{{subdomain}}}.{{{Infra.Domain}}};
|
|
|
|
ssl_certificate {{{Infra.NginxCertPath}}};
|
|
ssl_certificate_key {{{Infra.NginxCertKeyPath}}};
|
|
|
|
location / {
|
|
# Docker DNS resolves the container name on the managed network
|
|
set $upstream http://{{{containerName}}}:8080;
|
|
proxy_pass $upstream;
|
|
proxy_set_header Host $host;
|
|
proxy_set_header X-Real-IP $remote_addr;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
}
|
|
}
|
|
""";
|
|
|
|
var confFile = Path.Combine(confDPath, $"{subdomain}.conf");
|
|
await File.WriteAllTextAsync(confFile, confContent, ct);
|
|
logger.LogInformation("Wrote nginx config for {Subdomain} → {Container}", subdomain, containerName);
|
|
|
|
await ReloadNginxAsync(jobId, subdomain, ct);
|
|
}
|
|
|
|
public async Task RemoveNginxConfigAsync(string subdomain, CancellationToken ct)
|
|
{
|
|
var confDPath = config["Nginx:ConfDPath"];
|
|
if (string.IsNullOrWhiteSpace(confDPath)) return;
|
|
|
|
var confFile = Path.Combine(confDPath, $"{subdomain}.conf");
|
|
if (File.Exists(confFile))
|
|
{
|
|
File.Delete(confFile);
|
|
logger.LogInformation("Removed nginx config for {Subdomain}", subdomain);
|
|
await ReloadNginxAsync(Guid.Empty, subdomain, ct);
|
|
}
|
|
}
|
|
|
|
/// Sends SIGHUP to the nginx container which triggers a graceful config reload.
|
|
private async Task ReloadNginxAsync(Guid jobId, string subdomain, CancellationToken ct)
|
|
{
|
|
try
|
|
{
|
|
using var docker = CreateClient();
|
|
|
|
// Find the nginx container by image name — Aspire appends a random suffix to the name
|
|
// so we can't rely on the static name "nginx".
|
|
var containers = await docker.Containers.ListContainersAsync(
|
|
new ContainersListParameters
|
|
{
|
|
Filters = new Dictionary<string, IDictionary<string, bool>>
|
|
{
|
|
["ancestor"] = new Dictionary<string, bool> { ["nginx"] = true }
|
|
}
|
|
}, ct);
|
|
|
|
var nginx = containers.FirstOrDefault();
|
|
if (nginx is null)
|
|
{
|
|
logger.LogWarning("nginx container not found — skipping reload.");
|
|
return;
|
|
}
|
|
|
|
await docker.Containers.KillContainerAsync(nginx.ID, new ContainerKillParameters { Signal = "HUP" }, ct);
|
|
var containerName = nginx.Names.FirstOrDefault() ?? nginx.ID;
|
|
logger.LogInformation("nginx reloaded (container: {Name}).", containerName);
|
|
|
|
if (jobId != Guid.Empty)
|
|
{
|
|
await bus.Publish(new ProvisioningProgressEvent
|
|
{
|
|
JobId = jobId,
|
|
Type = "nginx_reloaded",
|
|
Step = "Container Launch",
|
|
Message = $"nginx reloaded — route for {subdomain}.{Infra.Domain} is live.",
|
|
}, ct);
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogWarning(ex, "Failed to reload nginx — new tenant route may not be active until next nginx restart.");
|
|
|
|
if (jobId != Guid.Empty)
|
|
{
|
|
await bus.Publish(new ProvisioningProgressEvent
|
|
{
|
|
JobId = jobId,
|
|
Type = "diagnostic",
|
|
Step = "Container Launch",
|
|
Message = "nginx reload failed — route may not be active.",
|
|
Detail = ex.ToString(),
|
|
}, ct);
|
|
}
|
|
}
|
|
}
|
|
|
|
// -- docker helpers --
|
|
|
|
private static async Task TryRemoveExistingAsync(DockerClient docker, string name, CancellationToken cancellationToken)
|
|
{
|
|
try
|
|
{
|
|
await docker.Containers.StopContainerAsync(name,
|
|
new ContainerStopParameters { WaitBeforeKillSeconds = 5 }, cancellationToken);
|
|
await docker.Containers.RemoveContainerAsync(name,
|
|
new ContainerRemoveParameters { Force = true }, cancellationToken);
|
|
}
|
|
catch (DockerContainerNotFoundException) { /* already gone - fine */ }
|
|
catch (DockerApiException ex) when (ex.StatusCode == System.Net.HttpStatusCode.NotFound) { /* same */ }
|
|
}
|
|
|
|
private static (string repo, string tag) SplitImageTag(string image)
|
|
{
|
|
var colon = image.LastIndexOf(':');
|
|
return colon < 0 ? (image, "latest") : (image[..colon], image[(colon + 1)..]);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Connects <paramref name="containerName"/> to <paramref name="network"/> with the given
|
|
/// <paramref name="alias"/> if it isn't already connected.
|
|
/// Silently no-ops if the container isn't found (it may not be running in all environments).
|
|
/// </summary>
|
|
private async Task EnsureContainerOnNetworkAsync(
|
|
DockerClient docker,
|
|
string containerName,
|
|
string network,
|
|
string alias,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
try
|
|
{
|
|
var inspect = await docker.Containers.InspectContainerAsync(containerName, cancellationToken);
|
|
|
|
if (inspect.NetworkSettings.Networks.TryGetValue(network, out var existing))
|
|
{
|
|
// Already connected — check whether our alias is present.
|
|
var hasAlias = existing.Aliases?.Contains(alias, StringComparer.OrdinalIgnoreCase) == true;
|
|
if (hasAlias) return;
|
|
|
|
// Connected but without the alias — disconnect so we can reconnect with it.
|
|
await docker.Networks.DisconnectNetworkAsync(network, new NetworkDisconnectParameters
|
|
{
|
|
Container = inspect.ID,
|
|
Force = true,
|
|
}, cancellationToken);
|
|
}
|
|
|
|
await docker.Networks.ConnectNetworkAsync(network, new NetworkConnectParameters
|
|
{
|
|
Container = inspect.ID,
|
|
EndpointConfig = new EndpointSettings
|
|
{
|
|
Aliases = [alias],
|
|
},
|
|
}, cancellationToken);
|
|
logger.LogInformation("Connected container '{Container}' to network '{Network}' with alias '{Alias}'.", containerName, network, alias);
|
|
}
|
|
catch (DockerContainerNotFoundException)
|
|
{
|
|
logger.LogWarning("Container '{Container}' not found — skipping network connect.", containerName);
|
|
}
|
|
catch (DockerApiException ex) when (ex.StatusCode == System.Net.HttpStatusCode.NotFound)
|
|
{
|
|
logger.LogWarning("Container '{Container}' not found — skipping network connect.", containerName);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
logger.LogWarning(ex, "Could not connect '{Container}' to '{Network}' — tenant JWT validation may fail.", containerName, network);
|
|
}
|
|
}
|
|
}
|