feat: multimodal AI event creation with image support #1

Merged
old4ever merged 20 commits from image-parse into main 2026-04-07 15:21:28 -04:00
Showing only changes of commit 79f98ebfd3 - Show all commits

View File

@@ -1,10 +1,11 @@
import { headers } from "next/headers";
import { NextResponse } from "next/server"; import { NextResponse } from "next/server";
import { auth } from "@/auth"; import { auth } from "@/auth";
import { headers } from "next/headers"; import { extractJsonFromText } from "@/lib/json-utils";
import { openRouterClient } from "@/lib/openrouter-client"; import { openRouterClient } from "@/lib/openrouter-client";
import { AiEventRequestSchema, AiEventResponseSchema } from "@/lib/types"; import { AiEventRequestSchema, AiEventResponseSchema } from "@/lib/types";
const MODEL = "openai/gpt-5.4-mini"; const MODEL = process.env.AI_MODEL ?? "openai/gpt-5.4-mini";
const buildSystemPrompt = () => ` const buildSystemPrompt = () => `
You are an assistant that converts natural language and images into an ARRAY of calendar events. You are an assistant that converts natural language and images into an ARRAY of calendar events.
@@ -42,7 +43,26 @@ const callTextOnly = async (systemPrompt: string, prompt: string) => {
}); });
const rawResponse = await result.getText(); const rawResponse = await result.getText();
return { rawResponse, startTime: performance.now() }; return { rawResponse };
};
/** Extract the text content from an OpenRouter chat.send response. */
const extractContentFromChatResponse = (response: unknown): string => {
if (
typeof response === "object" &&
response !== null &&
"choices" in response
) {
const choices = (
response as {
choices: Array<{ message: { content: string | unknown } }>;
}
).choices;
const content = choices?.[0]?.message?.content;
if (typeof content === "string") return content;
if (content) return JSON.stringify(content);
}
throw new Error("Unexpected response format from AI chat API");
}; };
const callMultimodal = async ( const callMultimodal = async (
@@ -70,8 +90,6 @@ const callMultimodal = async (
}, },
]; ];
const startTime = performance.now();
const response = await openRouterClient.chat.send({ const response = await openRouterClient.chat.send({
chatRequest: { chatRequest: {
model: MODEL, model: MODEL,
@@ -79,32 +97,8 @@ const callMultimodal = async (
}, },
}); });
const rawResponse = const rawResponse = extractContentFromChatResponse(response);
typeof response === "object" && return { rawResponse };
"choices" in response &&
response.choices?.[0]?.message
? typeof response.choices[0].message.content === "string"
? response.choices[0].message.content
: JSON.stringify(response.choices[0].message.content)
: JSON.stringify(response);
return { rawResponse, startTime };
};
const extractJsonFromText = (text: string): unknown => {
try {
return JSON.parse(text);
} catch {
const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
if (codeBlockMatch) {
return JSON.parse(codeBlockMatch[1].trim());
}
const arrayMatch = text.match(/\[[\s\S]*\]/);
if (arrayMatch) {
return JSON.parse(arrayMatch[0]);
}
throw new Error(`No JSON found in response: ${text.slice(0, 200)}`);
}
}; };
export async function POST(request: Request) { export async function POST(request: Request) {
@@ -133,25 +127,19 @@ export async function POST(request: Request) {
} }
const { prompt, imageBase64 } = parsedInput.data; const { prompt, imageBase64 } = parsedInput.data;
const inputMode = imageBase64 ? "multimodal" : "text";
const systemPrompt = buildSystemPrompt(); const systemPrompt = buildSystemPrompt();
let rawResponse: string | undefined;
try { try {
const result = const result = imageBase64
inputMode === "multimodal" ? await callMultimodal(systemPrompt, prompt, imageBase64)
? await callMultimodal(systemPrompt, prompt, imageBase64!) : await callTextOnly(systemPrompt, prompt ?? "");
: await callTextOnly(systemPrompt, prompt!);
rawResponse = result.rawResponse; const rawJson = extractJsonFromText(result.rawResponse);
const rawJson = extractJsonFromText(rawResponse);
const validated = AiEventResponseSchema.safeParse(rawJson); const validated = AiEventResponseSchema.safeParse(rawJson);
if (!validated.success) { if (!validated.success) {
console.error("AI response validation failed:", { console.error("AI response validation failed:", {
issues: validated.error.flatten().fieldErrors, issues: validated.error.flatten().fieldErrors,
rawResponse,
}); });
return NextResponse.json( return NextResponse.json(
@@ -167,10 +155,7 @@ export async function POST(request: Request) {
} catch (error) { } catch (error) {
console.error("AI Event Creation Error:", error); console.error("AI Event Creation Error:", error);
return NextResponse.json( return NextResponse.json(
{ { error: "Failed to process AI response. Please try again." },
error: "Failed to parse AI output",
raw: error instanceof Error ? error.message : String(error),
},
{ status: 500 }, { status: 500 },
); );
} }