feat: multimodal AI event creation with image support #1
@@ -2,37 +2,12 @@ import { NextResponse } from "next/server";
|
|||||||
import { auth } from "@/auth";
|
import { auth } from "@/auth";
|
||||||
import { headers } from "next/headers";
|
import { headers } from "next/headers";
|
||||||
import { openRouterClient } from "@/lib/openrouter-client";
|
import { openRouterClient } from "@/lib/openrouter-client";
|
||||||
|
import { AiEventRequestSchema, AiEventResponseSchema } from "@/lib/types";
|
||||||
|
|
||||||
export async function POST(request: Request) {
|
const MODEL = "openai/gpt-5.4-mini";
|
||||||
const session = await auth.api.getSession({
|
|
||||||
headers: await headers(),
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!session?.user) {
|
const buildSystemPrompt = () => `
|
||||||
return NextResponse.json(
|
You are an assistant that converts natural language and images into an ARRAY of calendar events.
|
||||||
{ error: "Authentication required" },
|
|
||||||
{ status: 401 },
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const { prompt } = await request.json();
|
|
||||||
|
|
||||||
// Validate prompt input
|
|
||||||
if (!prompt || typeof prompt !== "string" || prompt.trim().length === 0) {
|
|
||||||
return NextResponse.json(
|
|
||||||
{ error: "Prompt is required and must be a non-empty string" },
|
|
||||||
{ status: 400 },
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (prompt.length > 2000) {
|
|
||||||
return NextResponse.json(
|
|
||||||
{ error: "Prompt must be less than 2000 characters" },
|
|
||||||
{ status: 400 },
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const systemPrompt = `
|
|
||||||
You are an assistant that converts natural language into an ARRAY of calendar events.
|
|
||||||
TypeScript type:
|
TypeScript type:
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -55,25 +30,146 @@ Rules:
|
|||||||
- If no end time is given (and event is not allDay), default to 1 hour after start.
|
- If no end time is given (and event is not allDay), default to 1 hour after start.
|
||||||
- If multiple events are described, return multiple.
|
- If multiple events are described, return multiple.
|
||||||
- If recurrence is implied (e.g. "every Monday", "daily for 10 days", "monthly on the 15th"), generate a recurrenceRule.
|
- If recurrence is implied (e.g. "every Monday", "daily for 10 days", "monthly on the 15th"), generate a recurrenceRule.
|
||||||
|
- When analyzing an image, extract ALL visible event details: titles, dates, times, locations, descriptions.
|
||||||
- Output ONLY valid JSON (no prose).
|
- Output ONLY valid JSON (no prose).
|
||||||
`;
|
`;
|
||||||
|
|
||||||
try {
|
const callTextOnly = async (systemPrompt: string, prompt: string) => {
|
||||||
const result = openRouterClient.callModel({
|
const result = openRouterClient.callModel({
|
||||||
model: "openai/gpt-5.4-mini",
|
model: MODEL,
|
||||||
instructions: systemPrompt,
|
instructions: systemPrompt,
|
||||||
input: prompt,
|
input: prompt,
|
||||||
});
|
});
|
||||||
|
|
||||||
const text = await result.getText();
|
const rawResponse = await result.getText();
|
||||||
const parsed = JSON.parse(text);
|
return { rawResponse, startTime: performance.now() };
|
||||||
return NextResponse.json(parsed);
|
};
|
||||||
|
|
||||||
|
const callMultimodal = async (
|
||||||
|
systemPrompt: string,
|
||||||
|
prompt: string | undefined,
|
||||||
|
imageBase64: string,
|
||||||
|
) => {
|
||||||
|
const messages = [
|
||||||
|
{
|
||||||
|
role: "system" as const,
|
||||||
|
content: systemPrompt,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "user" as const,
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "text" as const,
|
||||||
|
text: prompt || "Extract all calendar events from this image.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: "image_url" as const,
|
||||||
|
imageUrl: { url: imageBase64 },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const startTime = performance.now();
|
||||||
|
|
||||||
|
const response = await openRouterClient.chat.send({
|
||||||
|
chatRequest: {
|
||||||
|
model: MODEL,
|
||||||
|
messages,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const rawResponse =
|
||||||
|
typeof response === "object" &&
|
||||||
|
"choices" in response &&
|
||||||
|
response.choices?.[0]?.message
|
||||||
|
? typeof response.choices[0].message.content === "string"
|
||||||
|
? response.choices[0].message.content
|
||||||
|
: JSON.stringify(response.choices[0].message.content)
|
||||||
|
: JSON.stringify(response);
|
||||||
|
|
||||||
|
return { rawResponse, startTime };
|
||||||
|
};
|
||||||
|
|
||||||
|
const extractJsonFromText = (text: string): unknown => {
|
||||||
|
try {
|
||||||
|
return JSON.parse(text);
|
||||||
|
} catch {
|
||||||
|
const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||||
|
if (codeBlockMatch) {
|
||||||
|
return JSON.parse(codeBlockMatch[1].trim());
|
||||||
|
}
|
||||||
|
const arrayMatch = text.match(/\[[\s\S]*\]/);
|
||||||
|
if (arrayMatch) {
|
||||||
|
return JSON.parse(arrayMatch[0]);
|
||||||
|
}
|
||||||
|
throw new Error(`No JSON found in response: ${text.slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export async function POST(request: Request) {
|
||||||
|
const session = await auth.api.getSession({
|
||||||
|
headers: await headers(),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!session?.user) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "Authentication required" },
|
||||||
|
{ status: 401 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const body = await request.json();
|
||||||
|
const parsedInput = AiEventRequestSchema.safeParse(body);
|
||||||
|
|
||||||
|
if (!parsedInput.success) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{
|
||||||
|
error: "Invalid input",
|
||||||
|
details: parsedInput.error.flatten().fieldErrors,
|
||||||
|
},
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const { prompt, imageBase64 } = parsedInput.data;
|
||||||
|
const inputMode = imageBase64 ? "multimodal" : "text";
|
||||||
|
const systemPrompt = buildSystemPrompt();
|
||||||
|
let rawResponse: string | undefined;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result =
|
||||||
|
inputMode === "multimodal"
|
||||||
|
? await callMultimodal(systemPrompt, prompt, imageBase64!)
|
||||||
|
: await callTextOnly(systemPrompt, prompt!);
|
||||||
|
|
||||||
|
rawResponse = result.rawResponse;
|
||||||
|
|
||||||
|
const rawJson = extractJsonFromText(rawResponse);
|
||||||
|
const validated = AiEventResponseSchema.safeParse(rawJson);
|
||||||
|
|
||||||
|
if (!validated.success) {
|
||||||
|
console.error("AI response validation failed:", {
|
||||||
|
issues: validated.error.flatten().fieldErrors,
|
||||||
|
rawResponse,
|
||||||
|
});
|
||||||
|
|
||||||
|
return NextResponse.json(
|
||||||
|
{
|
||||||
|
error: "AI returned invalid event data",
|
||||||
|
details: validated.error.flatten().fieldErrors,
|
||||||
|
},
|
||||||
|
{ status: 422 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NextResponse.json(validated.data);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("AI Event Creation Error:", error);
|
console.error("AI Event Creation Error:", error);
|
||||||
return NextResponse.json(
|
return NextResponse.json(
|
||||||
{
|
{
|
||||||
error: "Failed to parse AI output",
|
error: "Failed to parse AI output",
|
||||||
raw: error instanceof Error ? error.message : error,
|
raw: error instanceof Error ? error.message : String(error),
|
||||||
},
|
},
|
||||||
{ status: 500 },
|
{ status: 500 },
|
||||||
);
|
);
|
||||||
|
|||||||
Reference in New Issue
Block a user