OPC # 0002: Improvements to Client provisioning workflows

This commit is contained in:
amadzarak
2026-04-25 21:33:28 -04:00
parent 98049f3c50
commit 35fe82d225
7 changed files with 621 additions and 1 deletions
@@ -27,4 +27,19 @@ public class SagaContext
// Written by PulumiStep (DedicatedVM/Enterprise tier) — target host details for subsequent steps
public string? VmIpAddress { get; set; }
public string? VmSshKeyPath { get; set; }
/// <summary>
/// Per-component resolved endpoints for this provisioning job.
/// Keyed by component name: "Keycloak", "Vault", "Postgres", "Minio".
/// Built by ProvisioningWorker before the saga starts; OwnContainer host ports
/// are resolved and written back by InfrastructureProvisioningStep.
/// </summary>
public Dictionary<string, ResolvedEndpoint> ResolvedTopology { get; init; } =
new(StringComparer.OrdinalIgnoreCase);
/// <summary>
/// Absolute path to the generated docker-compose.yml for this tenant.
/// Non-null only for OwnContainer tenants.
/// </summary>
public string? ComposeFilePath { get; set; }
}
+4 -1
View File
@@ -22,5 +22,8 @@ public enum ComponentMode
VpsDocker,
/// <summary>Own VM with the component running as a native OS process (no Docker).</summary>
VpsBareMetal
VpsBareMetal,
/// <summary>Component is not provisioned for this tenant (feature not elected).</summary>
Disabled
}
@@ -0,0 +1,53 @@
namespace ControlPlane.Core.Models;
/// <summary>
/// The fully-resolved network addresses for one infrastructure component for a specific tenant.
/// Built by ProvisioningWorker at job start from StackConfig + ClarityInfraOptions.
/// Carried through SagaContext and persisted in TenantRecord at saga completion.
///
/// Design principle: Clarity.Server always talks to PublicUrl (goes through nginx/dnsmasq).
/// The Worker uses AdminUrl (direct host-accessible URL) for admin API calls during provisioning.
/// InternalUrl is injected into container env vars for container-to-container communication.
/// </summary>
public sealed record ResolvedEndpoint
{
/// <summary>Mode elected for this component.</summary>
public ComponentMode Mode { get; init; }
/// <summary>
/// URL the Worker process uses to call this component's admin API.
/// Worker runs on the host machine:
/// SharedPlatform → http://localhost:{exposedPort} (docker-compose exposes to host)
/// OwnContainer → http://localhost:{ephemeralPort} (resolved by InfrastructureProvisioningStep)
/// VPS → operator-supplied external URL
/// </summary>
public string AdminUrl { get; init; } = string.Empty;
/// <summary>
/// Public DNS URL injected into Clarity.Server and surfaced in the TenantRecord.
/// Always routes through nginx/dnsmasq — no direct Docker DNS leaks to app code.
/// SharedPlatform → https://keycloak.clarity.test
/// OwnContainer → https://kc.{subdomain}.clarity.test
/// </summary>
public string PublicUrl { get; init; } = string.Empty;
/// <summary>
/// Docker-internal URL for container-to-container communication on the managed network.
/// SharedPlatform → http://keycloak:8080
/// OwnContainer → http://kc-{subdomain}:8080
/// </summary>
public string InternalUrl { get; init; } = string.Empty;
/// <summary>Docker container name, if the Worker manages this component.</summary>
public string? ContainerName { get; init; }
/// <summary>
/// Admin username for this component instance.
/// Null for SharedPlatform (read from Keycloak:AdminUser config at call time).
/// Explicitly set for OwnContainer sidecars.
/// </summary>
public string? AdminUser { get; init; }
/// <summary>Admin password for this component instance. See AdminUser.</summary>
public string? AdminPassword { get; init; }
}
@@ -1,5 +1,8 @@
using System.Diagnostics;
using System.Text;
using ControlPlane.Core.Config;
using ControlPlane.Core.Messages;
using ControlPlane.Core.Models;
using Docker.DotNet;
using Docker.DotNet.Models;
using MassTransit;
@@ -85,6 +88,16 @@ public class ClarityContainerService(
{
NetworkMode = Infra.Network,
RestartPolicy = new RestartPolicy { Name = RestartPolicyKind.UnlessStopped },
// Map *.clarity.test domains to the Docker host gateway so that Clarity.Server,
// running inside a container, can reach nginx (which routes *.clarity.test).
// This is required for Keycloak OIDC discovery and JWT iss-claim validation —
// Keycloak issues tokens with iss=https://keycloak.clarity.test/realms/...
// and Clarity.Server must be able to reach that URL for OIDC metadata.
ExtraHosts =
[
$"keycloak.{Infra.Domain}:host-gateway",
$"{subdomain}.{Infra.Domain}:host-gateway",
],
},
Labels = new Dictionary<string, string>
{
@@ -107,6 +120,7 @@ public class ClarityContainerService(
logger.LogInformation("Started container {Name} on {Network} (image: {Image})", name, Infra.Network, ImageName);
await WriteNginxConfigAsync(subdomain, name, jobId, cancellationToken);
await WriteComposeArtifactAsync(environment, subdomain, keycloakRealm, name, cancellationToken);
return name;
}
@@ -355,4 +369,353 @@ public class ClarityContainerService(
logger.LogWarning(ex, "Could not connect '{Container}' to '{Network}' — tenant JWT validation may fail.", containerName, network);
}
}
// ── ClientAssets / compose artifact helpers ──────────────────────────────
private string ClientAssetsFolder(string subdomain)
{
var root = config["ClientAssets__Folder"] ?? config["ClientAssets:Folder"]
?? Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "ClientAssets"));
return Path.Combine(root, subdomain);
}
/// <summary>
/// Writes a docker-compose.yml to ClientAssets/{subdomain}/ documenting the SharedPlatform
/// clarity-server deployment. The file is an audit artifact — it is NOT executed by the Worker.
/// </summary>
private async Task WriteComposeArtifactAsync(
string environment,
string subdomain,
string keycloakRealm,
string containerName,
CancellationToken ct)
{
var folder = ClientAssetsFolder(subdomain);
try
{
Directory.CreateDirectory(folder);
var content = $$$"""
# Auto-generated by ControlPlane.Worker — do not edit manually.
# Tenant: {{{subdomain}}}
# Tier: SharedPlatform
# Generated: {{{DateTimeOffset.UtcNow:O}}}
name: clarity-{{{subdomain}}}
services:
app-{{{subdomain}}}:
image: {{{ImageName}}}
restart: unless-stopped
container_name: {{{containerName}}}
environment:
ASPNETCORE_ENVIRONMENT: Production
ASPNETCORE_URLS: http://+:8080
TenantSubdomain: {{{subdomain}}}
Keycloak__BaseUrl: {{{Infra.KeycloakPublicUrl}}}
Keycloak__InternalUrl: {{{Infra.KeycloakInternalUrl}}}
Keycloak__Realm: {{{keycloakRealm}}}
Vault__Address: {{{Infra.VaultInternalUrl}}}
# ConnectionStrings__postgresdb: (persisted in TenantRecord)
networks:
- clarity-net
extra_hosts:
- "keycloak.{{{Infra.Domain}}}:host-gateway"
- "{{{subdomain}}}.{{{Infra.Domain}}}:host-gateway"
labels:
clarity.managed: "true"
clarity.subdomain: {{{subdomain}}}
clarity.env: {{{environment}}}
networks:
clarity-net:
external: true
""";
var composePath = Path.Combine(folder, "docker-compose.yml");
await File.WriteAllTextAsync(composePath, content, ct);
logger.LogInformation("Wrote compose artifact for {Subdomain} → {Path}", subdomain, composePath);
}
catch (Exception ex)
{
// Non-fatal — the container is already running; the artifact is an audit record.
logger.LogWarning(ex, "Could not write compose artifact for {Subdomain}.", subdomain);
}
}
// ── OwnContainer — sidecar lifecycle ─────────────────────────────────────
/// <summary>
/// OwnContainer tier — generates a per-tenant docker-compose.yml for sidecar services
/// (Keycloak, Vault, Postgres, MinIO as elected by StackConfig), writes it to
/// ClientAssets/{subdomain}/docker-compose.yml, and runs <c>docker compose up -d</c>.
/// Returns the absolute path to the compose file.
/// </summary>
public async Task<string> GenerateAndRunSidecarsAsync(
ProvisioningJob job,
Dictionary<string, ResolvedEndpoint> topology,
CancellationToken ct)
{
var folder = ClientAssetsFolder(job.Subdomain);
Directory.CreateDirectory(folder);
var content = BuildSidecarCompose(job);
var composePath = Path.Combine(folder, "docker-compose.yml");
await File.WriteAllTextAsync(composePath, content, ct);
logger.LogInformation("[{JobId}] Wrote sidecar compose → {Path}", job.Id, composePath);
await RunDockerComposeAsync(composePath, "up -d", job.Id, ct);
logger.LogInformation("[{JobId}] Sidecar containers started.", job.Id);
return composePath;
}
/// <summary>
/// After sidecars are started, inspects each OwnContainer component's Docker container
/// to resolve its ephemeral host port, then rewrites the topology AdminUrl to
/// <c>http://localhost:{hostPort}</c> so downstream saga steps can call admin APIs.
/// </summary>
public async Task UpdateTopologyWithHostPortsAsync(
Dictionary<string, ResolvedEndpoint> topology,
CancellationToken ct)
{
using var docker = CreateClient();
foreach (var (component, endpoint) in topology.ToList())
{
if (endpoint.Mode != ComponentMode.OwnContainer) continue;
if (string.IsNullOrWhiteSpace(endpoint.ContainerName)) continue;
try
{
var inspect = await docker.Containers.InspectContainerAsync(endpoint.ContainerName, ct);
var firstBinding = inspect.NetworkSettings.Ports
.SelectMany(p => p.Value ?? [])
.FirstOrDefault(b => !string.IsNullOrWhiteSpace(b.HostPort));
if (firstBinding is not null)
{
topology[component] = endpoint with { AdminUrl = $"http://localhost:{firstBinding.HostPort}" };
logger.LogInformation("Resolved {Component} host port → {Url}", component, topology[component].AdminUrl);
}
else
{
logger.LogWarning("No host port binding found for {Component} container {Name}.", component, endpoint.ContainerName);
}
}
catch (Exception ex)
{
logger.LogWarning(ex, "Could not resolve host port for {Component} container {Name}.", component, endpoint.ContainerName);
}
}
}
/// <summary>
/// Tears down all sidecar containers for a tenant by running
/// <c>docker compose down -v</c> against the stored compose file.
/// Called from InfrastructureProvisioningStep.CompensateAsync.
/// </summary>
public async Task TearDownComposeProjectAsync(string subdomain, CancellationToken ct)
{
var composePath = Path.Combine(ClientAssetsFolder(subdomain), "docker-compose.yml");
if (!File.Exists(composePath))
{
logger.LogWarning("No compose file found for {Subdomain} — nothing to tear down.", subdomain);
return;
}
await RunDockerComposeAsync(composePath, "down -v", Guid.Empty, ct);
logger.LogInformation("Tore down sidecar containers for {Subdomain}.", subdomain);
}
/// <summary>
/// Builds the docker-compose YAML content for OwnContainer sidecar services.
/// Services are included conditionally based on StackConfig. clarity-net is
/// declared as an external network so all sidecars join the shared platform network.
///
/// All services include <c>extra_hosts: host-gateway</c> entries for *.clarity.test so that
/// intra-container calls that go through nginx (e.g. OIDC discovery) route correctly.
/// </summary>
private string BuildSidecarCompose(ProvisioningJob job)
{
var s = job.Subdomain;
var stack = job.StackConfig;
var sb = new StringBuilder();
sb.AppendLine($"""
# Auto-generated by ControlPlane.Worker — do not edit manually.
# Tenant: {s} | Tier: {job.Tier}
# Generated: {DateTimeOffset.UtcNow:O}
name: clarity-{s}
services:
""");
// ── Postgres ──────────────────────────────────────────────────────────
if (stack.Postgres == ComponentMode.OwnContainer)
{
sb.AppendLine($"""
pg-{s}:
image: postgres:16
restart: unless-stopped
environment:
POSTGRES_USER: clarity
POSTGRES_PASSWORD: ${{POSTGRES_PASSWORD:-clarity-dev}}
POSTGRES_DB: clarity
expose:
- "5432"
ports:
- "127.0.0.1::5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U clarity"]
interval: 10s
timeout: 5s
retries: 5
networks:
- clarity-net
labels:
clarity.managed: "true"
clarity.subdomain: {s}
clarity.component: postgres
""");
}
// ── Keycloak ──────────────────────────────────────────────────────────
if (stack.Keycloak == ComponentMode.OwnContainer)
{
var kcHostname = $"kc.{s}.{Infra.Domain}";
var dependsBlock = stack.Postgres == ComponentMode.OwnContainer
? $"""
depends_on:
pg-{s}:
condition: service_healthy
"""
: string.Empty;
sb.AppendLine($"""
kc-{s}:
image: quay.io/keycloak/keycloak:latest
restart: unless-stopped
command: start-dev
environment:
KEYCLOAK_ADMIN: admin
KEYCLOAK_ADMIN_PASSWORD: ${{KEYCLOAK_ADMIN_PASSWORD:-admin}}
KC_DB: postgres
KC_DB_URL_HOST: pg-{s}
KC_DB_URL_DATABASE: keycloak
KC_DB_USERNAME: clarity
KC_DB_PASSWORD: ${{POSTGRES_PASSWORD:-clarity-dev}}
KC_HOSTNAME: {kcHostname}
KC_HOSTNAME_STRICT: "false"
KC_HTTP_ENABLED: "true"
expose:
- "8080"
ports:
- "127.0.0.1::8080"
networks:
- clarity-net
extra_hosts:
- "{kcHostname}:host-gateway"
{dependsBlock}
labels:
clarity.managed: "true"
clarity.subdomain: {s}
clarity.component: keycloak
""");
}
// ── Vault ─────────────────────────────────────────────────────────────
if (stack.Vault == ComponentMode.OwnContainer)
{
sb.AppendLine($"""
vault-{s}:
image: hashicorp/vault:latest
restart: unless-stopped
cap_add:
- IPC_LOCK
environment:
VAULT_DEV_ROOT_TOKEN_ID: ${{VAULT_TOKEN:-vault-dev-root}}
VAULT_DEV_LISTEN_ADDRESS: "0.0.0.0:8200"
expose:
- "8200"
ports:
- "127.0.0.1::8200"
networks:
- clarity-net
labels:
clarity.managed: "true"
clarity.subdomain: {s}
clarity.component: vault
""");
}
// ── MinIO ─────────────────────────────────────────────────────────────
if (stack.Minio == ComponentMode.OwnContainer)
{
sb.AppendLine($"""
minio-{s}:
image: minio/minio:latest
restart: unless-stopped
command: server /data --console-address ":9001"
environment:
MINIO_ROOT_USER: ${{MINIO_ROOT_USER:-minio}}
MINIO_ROOT_PASSWORD: ${{MINIO_ROOT_PASSWORD:-minio-dev}}
expose:
- "9000"
- "9001"
ports:
- "127.0.0.1::9000"
- "127.0.0.1::9001"
networks:
- clarity-net
labels:
clarity.managed: "true"
clarity.subdomain: {s}
clarity.component: minio
""");
}
sb.AppendLine("""
networks:
clarity-net:
external: true
""");
return sb.ToString();
}
/// <summary>
/// Runs <c>docker compose -f {composePath} {args}</c> as a child process.
/// Streams stdout/stderr to the logger and throws on non-zero exit.
/// </summary>
private async Task RunDockerComposeAsync(string composePath, string args, Guid jobId, CancellationToken ct)
{
var psi = new ProcessStartInfo("docker")
{
Arguments = $"compose -f \"{composePath}\" {args}",
WorkingDirectory = Path.GetDirectoryName(composePath)!,
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
};
using var process = Process.Start(psi)
?? throw new InvalidOperationException("Failed to start docker compose process.");
var stdoutTask = process.StandardOutput.ReadToEndAsync(ct);
var stderrTask = process.StandardError.ReadToEndAsync(ct);
await process.WaitForExitAsync(ct);
var stdout = await stdoutTask;
var stderr = await stderrTask;
if (!string.IsNullOrWhiteSpace(stdout))
logger.LogInformation("[docker compose] {Output}", stdout.Trim());
if (!string.IsNullOrWhiteSpace(stderr))
logger.LogInformation("[docker compose stderr] {Output}", stderr.Trim());
if (process.ExitCode != 0)
throw new InvalidOperationException(
$"'docker compose {args}' exited with code {process.ExitCode}. See logs for details.");
}
}
@@ -28,6 +28,28 @@ public class KeycloakAdminClient
_http = new HttpClient { BaseAddress = new Uri(_baseUrl) };
}
/// <summary>
/// Creates a KeycloakAdminClient for a specific base URL and credentials.
/// Used by KeycloakStep to target SharedPlatform or OwnContainer Keycloak instances
/// using the resolved topology rather than static DI configuration.
/// </summary>
public static KeycloakAdminClient ForUrl(
string adminUrl, string adminUser, string adminPassword,
ILogger<KeycloakAdminClient> logger)
=> new(adminUrl, adminUser, adminPassword, logger);
private KeycloakAdminClient(
string adminUrl, string adminUser, string adminPassword,
ILogger<KeycloakAdminClient> logger)
{
_logger = logger;
_baseUrl = adminUrl.TrimEnd('/');
_adminUser = adminUser;
_adminPassword = adminPassword;
_logger.LogInformation("KeycloakAdminClient base URL: {Url}, user: {User}", _baseUrl, _adminUser);
_http = new HttpClient { BaseAddress = new Uri(_baseUrl) };
}
private async Task AuthorizeAsync(CancellationToken ct)
{
var form = new FormUrlEncodedContent(new Dictionary<string, string>
@@ -0,0 +1,108 @@
using ControlPlane.Core.Config;
using ControlPlane.Core.Interfaces;
using ControlPlane.Core.Models;
using ControlPlane.Worker.Services;
using Microsoft.Extensions.Options;
namespace ControlPlane.Worker.Steps;
/// <summary>
/// First saga step — maps to "Infrastructure Provisioning" in the frontend step tracker.
///
/// SharedPlatform tenants:
/// Probes Keycloak and Vault health endpoints so the saga fails fast with a clear,
/// actionable message if infra/docker-compose.yml isn't running — rather than timing
/// out inside KeycloakStep with a cryptic connection refused.
///
/// OwnContainer tenants (Dedicated / Enterprise tiers):
/// Generates a per-tenant docker-compose.yml to ClientAssets/{subdomain}/,
/// starts all sidecar containers (Keycloak, Vault, Postgres, MinIO as elected),
/// then resolves the ephemeral host ports into SagaContext.ResolvedTopology so
/// downstream steps (KeycloakStep etc.) can call sidecar admin APIs from the host.
/// </summary>
public class InfrastructureProvisioningStep(
ClarityContainerService containers,
IConfiguration config,
IOptions<ClarityInfraOptions> infraOptions,
ILogger<InfrastructureProvisioningStep> logger) : ISagaStep
{
public string StepName => "Infrastructure Provisioning";
public async Task ExecuteAsync(SagaContext context, CancellationToken ct)
{
var job = context.Job;
var allSharedPlatform =
job.StackConfig.Keycloak == ComponentMode.SharedPlatform &&
job.StackConfig.Vault == ComponentMode.SharedPlatform &&
job.StackConfig.Postgres == ComponentMode.SharedPlatform &&
job.StackConfig.Minio == ComponentMode.SharedPlatform;
if (allSharedPlatform)
{
logger.LogInformation("[{JobId}] SharedPlatform tier — verifying platform services are reachable.", job.Id);
await VerifySharedPlatformAsync(context, ct);
}
else
{
logger.LogInformation("[{JobId}] OwnContainer tier — generating compose manifest and starting sidecars.", job.Id);
var composeFile = await containers.GenerateAndRunSidecarsAsync(job, context.ResolvedTopology, ct);
context.ComposeFilePath = composeFile;
await containers.UpdateTopologyWithHostPortsAsync(context.ResolvedTopology, ct);
logger.LogInformation("[{JobId}] Sidecars started. Compose file: {File}", job.Id, composeFile);
}
context.Job.CompletedSteps |= CompletedSteps.InfrastructureProvisioned;
}
public async Task CompensateAsync(SagaContext context, CancellationToken ct)
{
if (!string.IsNullOrWhiteSpace(context.ComposeFilePath))
{
logger.LogWarning("[{JobId}] Compensating: tearing down sidecar containers.", context.Job.Id);
await containers.TearDownComposeProjectAsync(context.Job.Subdomain, ct);
}
}
// ── SharedPlatform health probes ─────────────────────────────────────────
private async Task VerifySharedPlatformAsync(SagaContext context, CancellationToken ct)
{
using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(10) };
var kcUrl = context.ResolvedTopology.TryGetValue("Keycloak", out var kc)
? kc.AdminUrl : "http://localhost:8080";
var vaultUrl = context.ResolvedTopology.TryGetValue("Vault", out var vault)
? vault.AdminUrl : "http://localhost:8200";
await ProbeAsync(http, $"{kcUrl}/health/ready", "Keycloak", ct);
// Vault returns non-200 on sealed/standby — any HTTP response means it's running
await ProbeAsync(http, $"{vaultUrl}/v1/sys/health", "Vault", ct, acceptAnyHttpResponse: true);
}
private async Task ProbeAsync(
HttpClient http, string url, string serviceName, CancellationToken ct,
bool acceptAnyHttpResponse = false)
{
try
{
var resp = await http.GetAsync(url, ct);
if (!acceptAnyHttpResponse && !resp.IsSuccessStatusCode)
throw new InvalidOperationException(
$"{serviceName} at {url} returned HTTP {(int)resp.StatusCode}. Is it healthy?");
logger.LogInformation("{Service} is reachable at {Url} ({Status}).",
serviceName, url, (int)resp.StatusCode);
}
catch (HttpRequestException ex)
{
throw new InvalidOperationException(
$"{serviceName} is not reachable at {url}. " +
$"Run `docker compose up -d` from OPC/infra/ before provisioning. ({ex.Message})", ex);
}
catch (TaskCanceledException ex) when (!ct.IsCancellationRequested)
{
throw new InvalidOperationException(
$"{serviceName} health check at {url} timed out. Is the service running?", ex);
}
}
}
+56
View File
@@ -0,0 +1,56 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.Hosting.Lifetime": "Information"
}
},
// ── Keycloak ──────────────────────────────────────────────────────────────────
// Worker runs on the host machine → use localhost URLs for admin API calls.
// These are the shared platform Keycloak credentials from infra/docker-compose.yml.
// Aspire no longer injects these — they live here.
"Keycloak": {
"AuthServerUrl": "http://localhost:8080",
"AdminUser": "admin",
"AdminPassword": "Admin1234!",
"Realm": "master",
"Resource": "admin-cli"
},
// ── Vault ─────────────────────────────────────────────────────────────────────
// Worker uses localhost:8200 for admin calls.
// Vault__KeysFile is machine-specific → still injected by Aspire AppHost.
"Vault": {
"Address": "http://localhost:8200",
"ContainerAddress": "http://vault:8200"
},
// ── ClarityInfraOptions (Clarity section) ─────────────────────────────────────
// These values describe what gets injected INTO tenant containers at docker run time.
// Containers live on clarity-net → use Docker DNS names (keycloak, vault, postgres).
// Nginx/dnsmasq surface these at public DNS names for the browser.
"Clarity": {
"Domain": "clarity.test",
"Network": "clarity-net",
"KeycloakPublicUrl": "https://keycloak.clarity.test",
"KeycloakInternalUrl": "http://keycloak:8080",
"VaultInternalUrl": "http://vault:8200",
"NginxCertPath": "/etc/nginx/certs/clarity.test.crt",
"NginxCertKeyPath": "/etc/nginx/certs/clarity.test.key"
},
// ── Docker ───────────────────────────────────────────────────────────────────
"Docker": {
"Socket": "npipe://./pipe/docker_engine",
"ClarityServerImage": "clarity-server:latest"
},
// ── Connection strings ────────────────────────────────────────────────────────
// platformdb: the shared infra postgres from infra/docker-compose.yml.
// Worker connects on localhost:5432 for tenant DB provisioning (MigrationStep).
// Aspire-managed opcdb (port 5433) is injected separately by AppHost via .WithReference.
"ConnectionStrings": {
"platformdb": "Host=localhost;Port=5432;Username=postgres;Password=postgres"
}
}