refactor: add shared cookie utility to core package

Move cookie parsing logic to a dedicated utility module that can be
shared across all scrapers. Supports both JSON array and cookie string
formats for all input sources (parameter, env var, file).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-23 19:28:44 -05:00
parent 4a467c9f02
commit 9bc57d6b54
2 changed files with 229 additions and 1 deletions

View File

@@ -36,7 +36,8 @@ export {
} from "./scrapers/kijiji"; } from "./scrapers/kijiji";
// Export shared types // Export shared types
export * from "./types/common"; export * from "./types/common";
// Export shared utilities
export * from "./utils/cookies";
export * from "./utils/delay"; export * from "./utils/delay";
export * from "./utils/format"; export * from "./utils/format";
// Export shared utilities
export * from "./utils/http"; export * from "./utils/http";

View File

@@ -0,0 +1,227 @@
/**
* Shared cookie handling utilities for marketplace scrapers
*/
export interface Cookie {
name: string;
value: string;
domain: string;
path: string;
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
export interface CookieConfig {
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
name: string;
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
domain: string;
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
envVar: string;
/** Path to cookie file (e.g., "./cookies/facebook.json") */
filePath: string;
}
/**
* Parse cookie string format into Cookie array
* Supports format: "name1=value1; name2=value2"
*/
export function parseCookieString(
cookieString: string,
domain: string,
): Cookie[] {
if (!cookieString?.trim()) {
return [];
}
return cookieString
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, ...valueParts] = pair.split("=");
const trimmedName = name.trim();
const trimmedValue = valueParts.join("=").trim();
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain,
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax" as const,
expirationDate: undefined,
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
}
/**
* Parse JSON array format into Cookie array
* Supports format: [{"name": "foo", "value": "bar", ...}]
*/
export function parseJsonCookies(jsonString: string): Cookie[] {
const parsed = JSON.parse(jsonString);
if (!Array.isArray(parsed)) {
return [];
}
return parsed.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
/**
* Try to parse cookies from a string (tries JSON first, then cookie string format)
*/
export function parseCookiesAuto(
input: string,
defaultDomain: string,
): Cookie[] {
// Try JSON array format first
try {
const cookies = parseJsonCookies(input);
if (cookies.length > 0) {
return cookies;
}
} catch {
// JSON parse failed, try cookie string format
}
// Try cookie string format
return parseCookieString(input, defaultDomain);
}
/**
* Load cookies from file (supports both JSON array and cookie string formats)
*/
export async function loadCookiesFromFile(
filePath: string,
defaultDomain: string,
): Promise<Cookie[]> {
const file = Bun.file(filePath);
if (!(await file.exists())) {
return [];
}
const content = await file.text();
return parseCookiesAuto(content.trim(), defaultDomain);
}
/**
* Format cookies array into Cookie header string for HTTP requests
*/
export function formatCookiesForHeader(
cookies: Cookie[],
targetDomain: string,
): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
targetDomain.endsWith(cookie.domain.slice(1)) ||
targetDomain === cookie.domain.slice(1)
);
}
// Host-only cookie
return cookie.domain === targetDomain;
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false;
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
}
/**
* Load cookies with priority: URL param > ENV var > file
* Supports both JSON array and cookie string formats for all sources
*/
export async function ensureCookies(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
// Priority 1: URL/API parameter (if provided)
if (cookiesSource) {
const cookies = parseCookiesAuto(cookiesSource, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
);
return cookies;
}
console.warn(
`${config.name} cookies parameter provided but no valid cookies extracted`,
);
}
// Priority 2: Environment variable
const envValue = process.env[config.envVar];
if (envValue?.trim()) {
const cookies = parseCookiesAuto(envValue, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
);
return cookies;
}
console.warn(`${config.envVar} env var contains no valid cookies`);
}
// Priority 3: Cookie file (fallback)
try {
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
);
return cookies;
}
} catch (e) {
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
}
// No cookies found from any source
throw new Error(
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
` 1. 'cookies' parameter (highest priority), or\n` +
` 2. ${config.envVar} environment variable, or\n` +
` 3. ${config.filePath} file (lowest priority)\n` +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
}
/**
* Try to load cookies, return empty array if none found (non-throwing version)
*/
export async function loadCookiesOptional(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
try {
return await ensureCookies(config, cookiesSource);
} catch {
return [];
}
}