Skip to content

AI Testing

The atmosphere-ai-test module provides testing utilities for AI endpoints. AiTestClient captures streaming text, events, and metadata from an AgentRuntime execution, and AiAssertions offers a fluent API for verifying the results in JUnit 5.

<dependency>
<groupId>org.atmosphere</groupId>
<artifactId>atmosphere-ai-test</artifactId>
<version>LATEST</version> <!-- check Maven Central for latest -->
<scope>test</scope>
</dependency>
import static org.atmosphere.ai.test.AiAssertions.assertThat;
class MyAgentTest {
@Test
void weatherToolIsCalled() {
var client = new AiTestClient(myAgentRuntime);
var response = client.prompt("What's the weather in Tokyo?");
assertThat(response)
.hasToolCall("get_weather")
.withArgument("city", "Tokyo")
.hasResult()
.and()
.containsText("Tokyo")
.completedWithin(Duration.ofSeconds(10))
.hasNoErrors();
}
}

AiTestClient wraps an AgentRuntime and executes prompts synchronously, capturing everything the runtime produces through the StreamingSession.

var client = new AiTestClient(myAgentRuntime);

Pass any AgentRuntime implementation — the built-in runtime, a Spring AI runtime, a LangChain4j runtime, or a mock.

// Simple prompt
AiResponse response = client.prompt("Explain WebSockets");
// Prompt with system prompt
AiResponse response = client.prompt("Explain WebSockets",
"You are a networking expert. Be concise.");

Both methods block until the runtime completes (or a 30-second timeout is reached) and return an AiResponse with all captured data.

AiResponse is a record that captures the full output of an AI endpoint execution:

FieldTypeDescription
textStringThe full accumulated text response
eventsList<AiEvent>All AiEvent instances emitted during streaming
metadataMap<String, Object>Metadata key-value pairs sent during streaming
errorsList<String>Error messages, if any
elapsedDurationTotal wall-clock time for the response
completedbooleanWhether the stream completed normally (vs error/timeout)
// Get all events of a specific type
List<AiEvent.ToolStart> toolStarts = response.eventsOfType(AiEvent.ToolStart.class);
// Check if a specific tool was called
boolean called = response.hasToolCall("get_weather");
// Check if a tool returned a result
boolean hasResult = response.hasToolResult("get_weather");
// Check for errors
boolean errored = response.hasErrors();

Fluent assertion API that integrates with JUnit 5. Import statically:

import static org.atmosphere.ai.test.AiAssertions.assertThat;
assertThat(response)
.containsText("WebSocket") // response text contains substring
.containsText("real-time");
assertThat(response)
.isComplete() // stream completed normally
.completedWithin(Duration.ofSeconds(5)) // completed within time limit
.hasNoErrors(); // no errors occurred
assertThat(response)
.hasToolCall("get_weather") // tool was called
.withArgument("city", "Tokyo") // with this argument
.hasResult() // and produced a result
.and() // back to parent assertions
.hasToolCall("convert_temperature")
.withArgument("from_unit", "C");
assertThat(response)
.containsEventType(AiEvent.TextDelta.class) // at least one TextDelta
.containsEventType(AiEvent.ToolStart.class); // at least one ToolStart
assertThat(response)
.hasMetadata("cost") // metadata key exists
.hasMetadata("model");

All assertions return this (or a sub-assertion object with .and() to return), so they chain naturally:

assertThat(response)
.isComplete()
.hasNoErrors()
.containsText("result")
.hasToolCall("search")
.withArgument("query", "AI")
.hasResult()
.and()
.completedWithin(Duration.ofSeconds(15))
.hasMetadata("cost");
class MockRuntime implements AgentRuntime {
@Override public String name() { return "mock"; }
@Override public boolean isAvailable() { return true; }
@Override public int priority() { return 0; }
@Override public void configure(AiConfig.LlmSettings settings) { }
@Override
public void execute(AgentExecutionContext context, StreamingSession session) {
session.send("Hello from mock!");
session.complete();
}
}
@Test
void mockResponse() {
var client = new AiTestClient(new MockRuntime());
var response = client.prompt("Hi");
assertThat(response)
.containsText("Hello from mock!")
.isComplete()
.hasNoErrors();
}
@Test
void toolsAreInvokedCorrectly() {
var client = new AiTestClient(runtime);
var response = client.prompt("What's 72°F in Celsius?");
assertThat(response)
.hasToolCall("convert_temperature")
.withArgument("value", 72.0)
.withArgument("from_unit", "F")
.hasResult()
.and()
.containsText("22") // 72°F ≈ 22.2°C
.hasNoErrors();
}
@Test
void handlesRuntimeErrors() {
var client = new AiTestClient(failingRuntime);
var response = client.prompt("trigger error");
assertTrue(response.hasErrors());
assertFalse(response.completed());
assertTrue(response.errors().stream()
.anyMatch(e -> e.contains("expected error")));
}
@Test
void respondsWithinSLA() {
var client = new AiTestClient(productionRuntime);
var response = client.prompt("Simple question");
assertThat(response)
.isComplete()
.completedWithin(Duration.ofSeconds(5));
// Also check raw elapsed time
assertTrue(response.elapsed().toMillis() < 5000,
"Response took " + response.elapsed().toMillis() + "ms");
}