feat: multimodal AI event creation with image support #1
@@ -2,37 +2,12 @@ import { NextResponse } from "next/server";
|
||||
import { auth } from "@/auth";
|
||||
import { headers } from "next/headers";
|
||||
import { openRouterClient } from "@/lib/openrouter-client";
|
||||
import { AiEventRequestSchema, AiEventResponseSchema } from "@/lib/types";
|
||||
|
||||
export async function POST(request: Request) {
|
||||
const session = await auth.api.getSession({
|
||||
headers: await headers(),
|
||||
});
|
||||
const MODEL = "openai/gpt-5.4-mini";
|
||||
|
||||
if (!session?.user) {
|
||||
return NextResponse.json(
|
||||
{ error: "Authentication required" },
|
||||
{ status: 401 },
|
||||
);
|
||||
}
|
||||
|
||||
const { prompt } = await request.json();
|
||||
|
||||
// Validate prompt input
|
||||
if (!prompt || typeof prompt !== "string" || prompt.trim().length === 0) {
|
||||
return NextResponse.json(
|
||||
{ error: "Prompt is required and must be a non-empty string" },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
if (prompt.length > 2000) {
|
||||
return NextResponse.json(
|
||||
{ error: "Prompt must be less than 2000 characters" },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
const systemPrompt = `
|
||||
You are an assistant that converts natural language into an ARRAY of calendar events.
|
||||
const buildSystemPrompt = () => `
|
||||
You are an assistant that converts natural language and images into an ARRAY of calendar events.
|
||||
TypeScript type:
|
||||
|
||||
{
|
||||
@@ -55,25 +30,146 @@ Rules:
|
||||
- If no end time is given (and event is not allDay), default to 1 hour after start.
|
||||
- If multiple events are described, return multiple.
|
||||
- If recurrence is implied (e.g. "every Monday", "daily for 10 days", "monthly on the 15th"), generate a recurrenceRule.
|
||||
- When analyzing an image, extract ALL visible event details: titles, dates, times, locations, descriptions.
|
||||
- Output ONLY valid JSON (no prose).
|
||||
`;
|
||||
|
||||
try {
|
||||
const result = openRouterClient.callModel({
|
||||
model: "openai/gpt-5.4-mini",
|
||||
instructions: systemPrompt,
|
||||
input: prompt,
|
||||
});
|
||||
const callTextOnly = async (systemPrompt: string, prompt: string) => {
|
||||
const result = openRouterClient.callModel({
|
||||
model: MODEL,
|
||||
instructions: systemPrompt,
|
||||
input: prompt,
|
||||
});
|
||||
|
||||
const text = await result.getText();
|
||||
const parsed = JSON.parse(text);
|
||||
return NextResponse.json(parsed);
|
||||
const rawResponse = await result.getText();
|
||||
return { rawResponse, startTime: performance.now() };
|
||||
};
|
||||
|
||||
const callMultimodal = async (
|
||||
systemPrompt: string,
|
||||
prompt: string | undefined,
|
||||
imageBase64: string,
|
||||
) => {
|
||||
const messages = [
|
||||
{
|
||||
role: "system" as const,
|
||||
content: systemPrompt,
|
||||
},
|
||||
{
|
||||
role: "user" as const,
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: prompt || "Extract all calendar events from this image.",
|
||||
},
|
||||
{
|
||||
type: "image_url" as const,
|
||||
imageUrl: { url: imageBase64 },
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const startTime = performance.now();
|
||||
|
||||
const response = await openRouterClient.chat.send({
|
||||
chatRequest: {
|
||||
model: MODEL,
|
||||
messages,
|
||||
},
|
||||
});
|
||||
|
||||
const rawResponse =
|
||||
typeof response === "object" &&
|
||||
"choices" in response &&
|
||||
response.choices?.[0]?.message
|
||||
? typeof response.choices[0].message.content === "string"
|
||||
? response.choices[0].message.content
|
||||
: JSON.stringify(response.choices[0].message.content)
|
||||
: JSON.stringify(response);
|
||||
|
||||
return { rawResponse, startTime };
|
||||
};
|
||||
|
||||
const extractJsonFromText = (text: string): unknown => {
|
||||
try {
|
||||
return JSON.parse(text);
|
||||
} catch {
|
||||
const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (codeBlockMatch) {
|
||||
return JSON.parse(codeBlockMatch[1].trim());
|
||||
}
|
||||
const arrayMatch = text.match(/\[[\s\S]*\]/);
|
||||
if (arrayMatch) {
|
||||
return JSON.parse(arrayMatch[0]);
|
||||
}
|
||||
throw new Error(`No JSON found in response: ${text.slice(0, 200)}`);
|
||||
}
|
||||
};
|
||||
|
||||
export async function POST(request: Request) {
|
||||
const session = await auth.api.getSession({
|
||||
headers: await headers(),
|
||||
});
|
||||
|
||||
if (!session?.user) {
|
||||
return NextResponse.json(
|
||||
{ error: "Authentication required" },
|
||||
{ status: 401 },
|
||||
);
|
||||
}
|
||||
|
||||
const body = await request.json();
|
||||
const parsedInput = AiEventRequestSchema.safeParse(body);
|
||||
|
||||
if (!parsedInput.success) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "Invalid input",
|
||||
details: parsedInput.error.flatten().fieldErrors,
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
const { prompt, imageBase64 } = parsedInput.data;
|
||||
const inputMode = imageBase64 ? "multimodal" : "text";
|
||||
const systemPrompt = buildSystemPrompt();
|
||||
let rawResponse: string | undefined;
|
||||
|
||||
try {
|
||||
const result =
|
||||
inputMode === "multimodal"
|
||||
? await callMultimodal(systemPrompt, prompt, imageBase64!)
|
||||
: await callTextOnly(systemPrompt, prompt!);
|
||||
|
||||
rawResponse = result.rawResponse;
|
||||
|
||||
const rawJson = extractJsonFromText(rawResponse);
|
||||
const validated = AiEventResponseSchema.safeParse(rawJson);
|
||||
|
||||
if (!validated.success) {
|
||||
console.error("AI response validation failed:", {
|
||||
issues: validated.error.flatten().fieldErrors,
|
||||
rawResponse,
|
||||
});
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "AI returned invalid event data",
|
||||
details: validated.error.flatten().fieldErrors,
|
||||
},
|
||||
{ status: 422 },
|
||||
);
|
||||
}
|
||||
|
||||
return NextResponse.json(validated.data);
|
||||
} catch (error) {
|
||||
console.error("AI Event Creation Error:", error);
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "Failed to parse AI output",
|
||||
raw: error instanceof Error ? error.message : error,
|
||||
raw: error instanceof Error ? error.message : String(error),
|
||||
},
|
||||
{ status: 500 },
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user