feat: multimodal AI event creation with image support #1

Merged
old4ever merged 20 commits from image-parse into main 2026-04-07 15:21:28 -04:00
Showing only changes of commit 8d1b04f646 - Show all commits

View File

@@ -2,37 +2,12 @@ import { NextResponse } from "next/server";
import { auth } from "@/auth";
import { headers } from "next/headers";
import { openRouterClient } from "@/lib/openrouter-client";
import { AiEventRequestSchema, AiEventResponseSchema } from "@/lib/types";
export async function POST(request: Request) {
const session = await auth.api.getSession({
headers: await headers(),
});
const MODEL = "openai/gpt-5.4-mini";
if (!session?.user) {
return NextResponse.json(
{ error: "Authentication required" },
{ status: 401 },
);
}
const { prompt } = await request.json();
// Validate prompt input
if (!prompt || typeof prompt !== "string" || prompt.trim().length === 0) {
return NextResponse.json(
{ error: "Prompt is required and must be a non-empty string" },
{ status: 400 },
);
}
if (prompt.length > 2000) {
return NextResponse.json(
{ error: "Prompt must be less than 2000 characters" },
{ status: 400 },
);
}
const systemPrompt = `
You are an assistant that converts natural language into an ARRAY of calendar events.
const buildSystemPrompt = () => `
You are an assistant that converts natural language and images into an ARRAY of calendar events.
TypeScript type:
{
@@ -55,25 +30,146 @@ Rules:
- If no end time is given (and event is not allDay), default to 1 hour after start.
- If multiple events are described, return multiple.
- If recurrence is implied (e.g. "every Monday", "daily for 10 days", "monthly on the 15th"), generate a recurrenceRule.
- When analyzing an image, extract ALL visible event details: titles, dates, times, locations, descriptions.
- Output ONLY valid JSON (no prose).
`;
try {
const callTextOnly = async (systemPrompt: string, prompt: string) => {
const result = openRouterClient.callModel({
model: "openai/gpt-5.4-mini",
model: MODEL,
instructions: systemPrompt,
input: prompt,
});
const text = await result.getText();
const parsed = JSON.parse(text);
return NextResponse.json(parsed);
const rawResponse = await result.getText();
return { rawResponse, startTime: performance.now() };
};
const callMultimodal = async (
systemPrompt: string,
prompt: string | undefined,
imageBase64: string,
) => {
const messages = [
{
role: "system" as const,
content: systemPrompt,
},
{
role: "user" as const,
content: [
{
type: "text" as const,
text: prompt || "Extract all calendar events from this image.",
},
{
type: "image_url" as const,
imageUrl: { url: imageBase64 },
},
],
},
];
const startTime = performance.now();
const response = await openRouterClient.chat.send({
chatRequest: {
model: MODEL,
messages,
},
});
const rawResponse =
typeof response === "object" &&
"choices" in response &&
response.choices?.[0]?.message
? typeof response.choices[0].message.content === "string"
? response.choices[0].message.content
: JSON.stringify(response.choices[0].message.content)
: JSON.stringify(response);
return { rawResponse, startTime };
};
const extractJsonFromText = (text: string): unknown => {
try {
return JSON.parse(text);
} catch {
const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
if (codeBlockMatch) {
return JSON.parse(codeBlockMatch[1].trim());
}
const arrayMatch = text.match(/\[[\s\S]*\]/);
if (arrayMatch) {
return JSON.parse(arrayMatch[0]);
}
throw new Error(`No JSON found in response: ${text.slice(0, 200)}`);
}
};
export async function POST(request: Request) {
const session = await auth.api.getSession({
headers: await headers(),
});
if (!session?.user) {
return NextResponse.json(
{ error: "Authentication required" },
{ status: 401 },
);
}
const body = await request.json();
const parsedInput = AiEventRequestSchema.safeParse(body);
if (!parsedInput.success) {
return NextResponse.json(
{
error: "Invalid input",
details: parsedInput.error.flatten().fieldErrors,
},
{ status: 400 },
);
}
const { prompt, imageBase64 } = parsedInput.data;
const inputMode = imageBase64 ? "multimodal" : "text";
const systemPrompt = buildSystemPrompt();
let rawResponse: string | undefined;
try {
const result =
inputMode === "multimodal"
? await callMultimodal(systemPrompt, prompt, imageBase64!)
: await callTextOnly(systemPrompt, prompt!);
rawResponse = result.rawResponse;
const rawJson = extractJsonFromText(rawResponse);
const validated = AiEventResponseSchema.safeParse(rawJson);
if (!validated.success) {
console.error("AI response validation failed:", {
issues: validated.error.flatten().fieldErrors,
rawResponse,
});
return NextResponse.json(
{
error: "AI returned invalid event data",
details: validated.error.flatten().fieldErrors,
},
{ status: 422 },
);
}
return NextResponse.json(validated.data);
} catch (error) {
console.error("AI Event Creation Error:", error);
return NextResponse.json(
{
error: "Failed to parse AI output",
raw: error instanceof Error ? error.message : error,
raw: error instanceof Error ? error.message : String(error),
},
{ status: 500 },
);