OPC # 0002: Improvements to Client provisioning workflows

This commit is contained in:
amadzarak
2026-04-25 21:33:28 -04:00
parent 98049f3c50
commit 35fe82d225
7 changed files with 621 additions and 1 deletions
@@ -0,0 +1,108 @@
using ControlPlane.Core.Config;
using ControlPlane.Core.Interfaces;
using ControlPlane.Core.Models;
using ControlPlane.Worker.Services;
using Microsoft.Extensions.Options;
namespace ControlPlane.Worker.Steps;
/// <summary>
/// First saga step — maps to "Infrastructure Provisioning" in the frontend step tracker.
///
/// SharedPlatform tenants:
/// Probes Keycloak and Vault health endpoints so the saga fails fast with a clear,
/// actionable message if infra/docker-compose.yml isn't running — rather than timing
/// out inside KeycloakStep with a cryptic connection refused.
///
/// OwnContainer tenants (Dedicated / Enterprise tiers):
/// Generates a per-tenant docker-compose.yml to ClientAssets/{subdomain}/,
/// starts all sidecar containers (Keycloak, Vault, Postgres, MinIO as elected),
/// then resolves the ephemeral host ports into SagaContext.ResolvedTopology so
/// downstream steps (KeycloakStep etc.) can call sidecar admin APIs from the host.
/// </summary>
public class InfrastructureProvisioningStep(
ClarityContainerService containers,
IConfiguration config,
IOptions<ClarityInfraOptions> infraOptions,
ILogger<InfrastructureProvisioningStep> logger) : ISagaStep
{
public string StepName => "Infrastructure Provisioning";
public async Task ExecuteAsync(SagaContext context, CancellationToken ct)
{
var job = context.Job;
var allSharedPlatform =
job.StackConfig.Keycloak == ComponentMode.SharedPlatform &&
job.StackConfig.Vault == ComponentMode.SharedPlatform &&
job.StackConfig.Postgres == ComponentMode.SharedPlatform &&
job.StackConfig.Minio == ComponentMode.SharedPlatform;
if (allSharedPlatform)
{
logger.LogInformation("[{JobId}] SharedPlatform tier — verifying platform services are reachable.", job.Id);
await VerifySharedPlatformAsync(context, ct);
}
else
{
logger.LogInformation("[{JobId}] OwnContainer tier — generating compose manifest and starting sidecars.", job.Id);
var composeFile = await containers.GenerateAndRunSidecarsAsync(job, context.ResolvedTopology, ct);
context.ComposeFilePath = composeFile;
await containers.UpdateTopologyWithHostPortsAsync(context.ResolvedTopology, ct);
logger.LogInformation("[{JobId}] Sidecars started. Compose file: {File}", job.Id, composeFile);
}
context.Job.CompletedSteps |= CompletedSteps.InfrastructureProvisioned;
}
public async Task CompensateAsync(SagaContext context, CancellationToken ct)
{
if (!string.IsNullOrWhiteSpace(context.ComposeFilePath))
{
logger.LogWarning("[{JobId}] Compensating: tearing down sidecar containers.", context.Job.Id);
await containers.TearDownComposeProjectAsync(context.Job.Subdomain, ct);
}
}
// ── SharedPlatform health probes ─────────────────────────────────────────
private async Task VerifySharedPlatformAsync(SagaContext context, CancellationToken ct)
{
using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(10) };
var kcUrl = context.ResolvedTopology.TryGetValue("Keycloak", out var kc)
? kc.AdminUrl : "http://localhost:8080";
var vaultUrl = context.ResolvedTopology.TryGetValue("Vault", out var vault)
? vault.AdminUrl : "http://localhost:8200";
await ProbeAsync(http, $"{kcUrl}/health/ready", "Keycloak", ct);
// Vault returns non-200 on sealed/standby — any HTTP response means it's running
await ProbeAsync(http, $"{vaultUrl}/v1/sys/health", "Vault", ct, acceptAnyHttpResponse: true);
}
private async Task ProbeAsync(
HttpClient http, string url, string serviceName, CancellationToken ct,
bool acceptAnyHttpResponse = false)
{
try
{
var resp = await http.GetAsync(url, ct);
if (!acceptAnyHttpResponse && !resp.IsSuccessStatusCode)
throw new InvalidOperationException(
$"{serviceName} at {url} returned HTTP {(int)resp.StatusCode}. Is it healthy?");
logger.LogInformation("{Service} is reachable at {Url} ({Status}).",
serviceName, url, (int)resp.StatusCode);
}
catch (HttpRequestException ex)
{
throw new InvalidOperationException(
$"{serviceName} is not reachable at {url}. " +
$"Run `docker compose up -d` from OPC/infra/ before provisioning. ({ex.Message})", ex);
}
catch (TaskCanceledException ex) when (!ct.IsCancellationRequested)
{
throw new InvalidOperationException(
$"{serviceName} health check at {url} timed out. Is the service running?", ex);
}
}
}