From 9bc57d6b549302db3afc7eb74f01add102463f1e Mon Sep 17 00:00:00 2001 From: Dmytro Stanchiev Date: Fri, 23 Jan 2026 19:28:44 -0500 Subject: [PATCH] refactor: add shared cookie utility to core package Move cookie parsing logic to a dedicated utility module that can be shared across all scrapers. Supports both JSON array and cookie string formats for all input sources (parameter, env var, file). Co-Authored-By: Claude Opus 4.5 --- packages/core/src/index.ts | 3 +- packages/core/src/utils/cookies.ts | 227 +++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+), 1 deletion(-) create mode 100644 packages/core/src/utils/cookies.ts diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index c743dd4..ea2c1fb 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -36,7 +36,8 @@ export { } from "./scrapers/kijiji"; // Export shared types export * from "./types/common"; +// Export shared utilities +export * from "./utils/cookies"; export * from "./utils/delay"; export * from "./utils/format"; -// Export shared utilities export * from "./utils/http"; diff --git a/packages/core/src/utils/cookies.ts b/packages/core/src/utils/cookies.ts new file mode 100644 index 0000000..e07c8f1 --- /dev/null +++ b/packages/core/src/utils/cookies.ts @@ -0,0 +1,227 @@ +/** + * Shared cookie handling utilities for marketplace scrapers + */ + +export interface Cookie { + name: string; + value: string; + domain: string; + path: string; + secure?: boolean; + httpOnly?: boolean; + sameSite?: "strict" | "lax" | "none" | "unspecified"; + session?: boolean; + expirationDate?: number; + partitionKey?: Record; + storeId?: string; +} + +export interface CookieConfig { + /** Name used in log messages (e.g., "Facebook", "Kijiji") */ + name: string; + /** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */ + domain: string; + /** Environment variable name (e.g., "FACEBOOK_COOKIE") */ + envVar: string; + /** Path to cookie file (e.g., "./cookies/facebook.json") */ + filePath: string; +} + +/** + * Parse cookie string format into Cookie array + * Supports format: "name1=value1; name2=value2" + */ +export function parseCookieString( + cookieString: string, + domain: string, +): Cookie[] { + if (!cookieString?.trim()) { + return []; + } + + return cookieString + .split(";") + .map((pair) => pair.trim()) + .filter((pair) => pair.includes("=")) + .map((pair) => { + const [name, ...valueParts] = pair.split("="); + const trimmedName = name.trim(); + const trimmedValue = valueParts.join("=").trim(); + + if (!trimmedName || !trimmedValue) { + return null; + } + + return { + name: trimmedName, + value: decodeURIComponent(trimmedValue), + domain, + path: "/", + secure: true, + httpOnly: false, + sameSite: "lax" as const, + expirationDate: undefined, + }; + }) + .filter((cookie): cookie is Cookie => cookie !== null); +} + +/** + * Parse JSON array format into Cookie array + * Supports format: [{"name": "foo", "value": "bar", ...}] + */ +export function parseJsonCookies(jsonString: string): Cookie[] { + const parsed = JSON.parse(jsonString); + if (!Array.isArray(parsed)) { + return []; + } + + return parsed.filter( + (cookie): cookie is Cookie => + cookie && + typeof cookie.name === "string" && + typeof cookie.value === "string", + ); +} + +/** + * Try to parse cookies from a string (tries JSON first, then cookie string format) + */ +export function parseCookiesAuto( + input: string, + defaultDomain: string, +): Cookie[] { + // Try JSON array format first + try { + const cookies = parseJsonCookies(input); + if (cookies.length > 0) { + return cookies; + } + } catch { + // JSON parse failed, try cookie string format + } + + // Try cookie string format + return parseCookieString(input, defaultDomain); +} + +/** + * Load cookies from file (supports both JSON array and cookie string formats) + */ +export async function loadCookiesFromFile( + filePath: string, + defaultDomain: string, +): Promise { + const file = Bun.file(filePath); + if (!(await file.exists())) { + return []; + } + + const content = await file.text(); + return parseCookiesAuto(content.trim(), defaultDomain); +} + +/** + * Format cookies array into Cookie header string for HTTP requests + */ +export function formatCookiesForHeader( + cookies: Cookie[], + targetDomain: string, +): string { + const validCookies = cookies + .filter((cookie) => { + // Check if cookie applies to this domain + if (cookie.domain.startsWith(".")) { + // Domain cookie (applies to subdomains) + return ( + targetDomain.endsWith(cookie.domain.slice(1)) || + targetDomain === cookie.domain.slice(1) + ); + } + // Host-only cookie + return cookie.domain === targetDomain; + }) + .filter((cookie) => { + // Check expiration + if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) { + return false; + } + return true; + }); + + return validCookies + .map((cookie) => `${cookie.name}=${cookie.value}`) + .join("; "); +} + +/** + * Load cookies with priority: URL param > ENV var > file + * Supports both JSON array and cookie string formats for all sources + */ +export async function ensureCookies( + config: CookieConfig, + cookiesSource?: string, +): Promise { + // Priority 1: URL/API parameter (if provided) + if (cookiesSource) { + const cookies = parseCookiesAuto(cookiesSource, config.domain); + if (cookies.length > 0) { + console.log( + `Loaded ${cookies.length} ${config.name} cookies from parameter`, + ); + return cookies; + } + console.warn( + `${config.name} cookies parameter provided but no valid cookies extracted`, + ); + } + + // Priority 2: Environment variable + const envValue = process.env[config.envVar]; + if (envValue?.trim()) { + const cookies = parseCookiesAuto(envValue, config.domain); + if (cookies.length > 0) { + console.log( + `Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`, + ); + return cookies; + } + console.warn(`${config.envVar} env var contains no valid cookies`); + } + + // Priority 3: Cookie file (fallback) + try { + const cookies = await loadCookiesFromFile(config.filePath, config.domain); + if (cookies.length > 0) { + console.log( + `Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`, + ); + return cookies; + } + } catch (e) { + console.warn(`Could not load cookies from ${config.filePath}: ${e}`); + } + + // No cookies found from any source + throw new Error( + `No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` + + ` 1. 'cookies' parameter (highest priority), or\n` + + ` 2. ${config.envVar} environment variable, or\n` + + ` 3. ${config.filePath} file (lowest priority)\n` + + 'Format: JSON array or cookie string like "name1=value1; name2=value2"', + ); +} + +/** + * Try to load cookies, return empty array if none found (non-throwing version) + */ +export async function loadCookiesOptional( + config: CookieConfig, + cookiesSource?: string, +): Promise { + try { + return await ensureCookies(config, cookiesSource); + } catch { + return []; + } +}