feat: multimodal AI event creation with image support #1

Merged
old4ever merged 20 commits from image-parse into main 2026-04-07 15:21:28 -04:00
Showing only changes of commit 79f98ebfd3 - Show all commits

View File

@@ -1,10 +1,11 @@
import { headers } from "next/headers";
import { NextResponse } from "next/server";
import { auth } from "@/auth";
import { headers } from "next/headers";
import { extractJsonFromText } from "@/lib/json-utils";
import { openRouterClient } from "@/lib/openrouter-client";
import { AiEventRequestSchema, AiEventResponseSchema } from "@/lib/types";
const MODEL = "openai/gpt-5.4-mini";
const MODEL = process.env.AI_MODEL ?? "openai/gpt-5.4-mini";
const buildSystemPrompt = () => `
You are an assistant that converts natural language and images into an ARRAY of calendar events.
@@ -42,7 +43,26 @@ const callTextOnly = async (systemPrompt: string, prompt: string) => {
});
const rawResponse = await result.getText();
return { rawResponse, startTime: performance.now() };
return { rawResponse };
};
/** Extract the text content from an OpenRouter chat.send response. */
const extractContentFromChatResponse = (response: unknown): string => {
if (
typeof response === "object" &&
response !== null &&
"choices" in response
) {
const choices = (
response as {
choices: Array<{ message: { content: string | unknown } }>;
}
).choices;
const content = choices?.[0]?.message?.content;
if (typeof content === "string") return content;
if (content) return JSON.stringify(content);
}
throw new Error("Unexpected response format from AI chat API");
};
const callMultimodal = async (
@@ -70,8 +90,6 @@ const callMultimodal = async (
},
];
const startTime = performance.now();
const response = await openRouterClient.chat.send({
chatRequest: {
model: MODEL,
@@ -79,32 +97,8 @@ const callMultimodal = async (
},
});
const rawResponse =
typeof response === "object" &&
"choices" in response &&
response.choices?.[0]?.message
? typeof response.choices[0].message.content === "string"
? response.choices[0].message.content
: JSON.stringify(response.choices[0].message.content)
: JSON.stringify(response);
return { rawResponse, startTime };
};
const extractJsonFromText = (text: string): unknown => {
try {
return JSON.parse(text);
} catch {
const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
if (codeBlockMatch) {
return JSON.parse(codeBlockMatch[1].trim());
}
const arrayMatch = text.match(/\[[\s\S]*\]/);
if (arrayMatch) {
return JSON.parse(arrayMatch[0]);
}
throw new Error(`No JSON found in response: ${text.slice(0, 200)}`);
}
const rawResponse = extractContentFromChatResponse(response);
return { rawResponse };
};
export async function POST(request: Request) {
@@ -133,25 +127,19 @@ export async function POST(request: Request) {
}
const { prompt, imageBase64 } = parsedInput.data;
const inputMode = imageBase64 ? "multimodal" : "text";
const systemPrompt = buildSystemPrompt();
let rawResponse: string | undefined;
try {
const result =
inputMode === "multimodal"
? await callMultimodal(systemPrompt, prompt, imageBase64!)
: await callTextOnly(systemPrompt, prompt!);
const result = imageBase64
? await callMultimodal(systemPrompt, prompt, imageBase64)
: await callTextOnly(systemPrompt, prompt ?? "");
rawResponse = result.rawResponse;
const rawJson = extractJsonFromText(rawResponse);
const rawJson = extractJsonFromText(result.rawResponse);
const validated = AiEventResponseSchema.safeParse(rawJson);
if (!validated.success) {
console.error("AI response validation failed:", {
issues: validated.error.flatten().fieldErrors,
rawResponse,
});
return NextResponse.json(
@@ -167,10 +155,7 @@ export async function POST(request: Request) {
} catch (error) {
console.error("AI Event Creation Error:", error);
return NextResponse.json(
{
error: "Failed to parse AI output",
raw: error instanceof Error ? error.message : String(error),
},
{ error: "Failed to process AI response. Please try again." },
{ status: 500 },
);
}