OPC # 0002: Improvements to Client provisioning workflows
This commit is contained in:
@@ -0,0 +1,108 @@
|
||||
using ControlPlane.Core.Config;
|
||||
using ControlPlane.Core.Interfaces;
|
||||
using ControlPlane.Core.Models;
|
||||
using ControlPlane.Worker.Services;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace ControlPlane.Worker.Steps;
|
||||
|
||||
/// <summary>
|
||||
/// First saga step — maps to "Infrastructure Provisioning" in the frontend step tracker.
|
||||
///
|
||||
/// SharedPlatform tenants:
|
||||
/// Probes Keycloak and Vault health endpoints so the saga fails fast with a clear,
|
||||
/// actionable message if infra/docker-compose.yml isn't running — rather than timing
|
||||
/// out inside KeycloakStep with a cryptic connection refused.
|
||||
///
|
||||
/// OwnContainer tenants (Dedicated / Enterprise tiers):
|
||||
/// Generates a per-tenant docker-compose.yml to ClientAssets/{subdomain}/,
|
||||
/// starts all sidecar containers (Keycloak, Vault, Postgres, MinIO as elected),
|
||||
/// then resolves the ephemeral host ports into SagaContext.ResolvedTopology so
|
||||
/// downstream steps (KeycloakStep etc.) can call sidecar admin APIs from the host.
|
||||
/// </summary>
|
||||
public class InfrastructureProvisioningStep(
|
||||
ClarityContainerService containers,
|
||||
IConfiguration config,
|
||||
IOptions<ClarityInfraOptions> infraOptions,
|
||||
ILogger<InfrastructureProvisioningStep> logger) : ISagaStep
|
||||
{
|
||||
public string StepName => "Infrastructure Provisioning";
|
||||
|
||||
public async Task ExecuteAsync(SagaContext context, CancellationToken ct)
|
||||
{
|
||||
var job = context.Job;
|
||||
|
||||
var allSharedPlatform =
|
||||
job.StackConfig.Keycloak == ComponentMode.SharedPlatform &&
|
||||
job.StackConfig.Vault == ComponentMode.SharedPlatform &&
|
||||
job.StackConfig.Postgres == ComponentMode.SharedPlatform &&
|
||||
job.StackConfig.Minio == ComponentMode.SharedPlatform;
|
||||
|
||||
if (allSharedPlatform)
|
||||
{
|
||||
logger.LogInformation("[{JobId}] SharedPlatform tier — verifying platform services are reachable.", job.Id);
|
||||
await VerifySharedPlatformAsync(context, ct);
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.LogInformation("[{JobId}] OwnContainer tier — generating compose manifest and starting sidecars.", job.Id);
|
||||
var composeFile = await containers.GenerateAndRunSidecarsAsync(job, context.ResolvedTopology, ct);
|
||||
context.ComposeFilePath = composeFile;
|
||||
await containers.UpdateTopologyWithHostPortsAsync(context.ResolvedTopology, ct);
|
||||
logger.LogInformation("[{JobId}] Sidecars started. Compose file: {File}", job.Id, composeFile);
|
||||
}
|
||||
|
||||
context.Job.CompletedSteps |= CompletedSteps.InfrastructureProvisioned;
|
||||
}
|
||||
|
||||
public async Task CompensateAsync(SagaContext context, CancellationToken ct)
|
||||
{
|
||||
if (!string.IsNullOrWhiteSpace(context.ComposeFilePath))
|
||||
{
|
||||
logger.LogWarning("[{JobId}] Compensating: tearing down sidecar containers.", context.Job.Id);
|
||||
await containers.TearDownComposeProjectAsync(context.Job.Subdomain, ct);
|
||||
}
|
||||
}
|
||||
|
||||
// ── SharedPlatform health probes ─────────────────────────────────────────
|
||||
|
||||
private async Task VerifySharedPlatformAsync(SagaContext context, CancellationToken ct)
|
||||
{
|
||||
using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(10) };
|
||||
|
||||
var kcUrl = context.ResolvedTopology.TryGetValue("Keycloak", out var kc)
|
||||
? kc.AdminUrl : "http://localhost:8080";
|
||||
var vaultUrl = context.ResolvedTopology.TryGetValue("Vault", out var vault)
|
||||
? vault.AdminUrl : "http://localhost:8200";
|
||||
|
||||
await ProbeAsync(http, $"{kcUrl}/health/ready", "Keycloak", ct);
|
||||
// Vault returns non-200 on sealed/standby — any HTTP response means it's running
|
||||
await ProbeAsync(http, $"{vaultUrl}/v1/sys/health", "Vault", ct, acceptAnyHttpResponse: true);
|
||||
}
|
||||
|
||||
private async Task ProbeAsync(
|
||||
HttpClient http, string url, string serviceName, CancellationToken ct,
|
||||
bool acceptAnyHttpResponse = false)
|
||||
{
|
||||
try
|
||||
{
|
||||
var resp = await http.GetAsync(url, ct);
|
||||
if (!acceptAnyHttpResponse && !resp.IsSuccessStatusCode)
|
||||
throw new InvalidOperationException(
|
||||
$"{serviceName} at {url} returned HTTP {(int)resp.StatusCode}. Is it healthy?");
|
||||
logger.LogInformation("{Service} is reachable at {Url} ({Status}).",
|
||||
serviceName, url, (int)resp.StatusCode);
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"{serviceName} is not reachable at {url}. " +
|
||||
$"Run `docker compose up -d` from OPC/infra/ before provisioning. ({ex.Message})", ex);
|
||||
}
|
||||
catch (TaskCanceledException ex) when (!ct.IsCancellationRequested)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"{serviceName} health check at {url} timed out. Is the service running?", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user