From 251fcbb7d908899981d46d3a83106bba577eb59c Mon Sep 17 00:00:00 2001 From: Dmytro Stanchiev Date: Fri, 23 Jan 2026 19:28:52 -0500 Subject: [PATCH] refactor: use shared cookie utility in facebook scraper Replace inline cookie parsing with shared utility functions. Maintains backward compatibility with existing exports. Co-Authored-By: Claude Opus 4.5 --- packages/core/src/scrapers/facebook.ts | 259 +++---------------------- 1 file changed, 22 insertions(+), 237 deletions(-) diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index 7b4227c..f74ac5f 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -1,6 +1,13 @@ import cliProgress from "cli-progress"; import { parseHTML } from "linkedom"; import type { HTMLString } from "../types/common"; +import { + type Cookie, + type CookieConfig, + ensureCookies, + formatCookiesForHeader, + parseCookieString, +} from "../utils/cookies"; import { delay } from "../utils/delay"; import { formatCentsToCurrency } from "../utils/format"; import { isRecord } from "../utils/http"; @@ -13,21 +20,13 @@ import { isRecord } from "../utils/http"; * This is by design to respect Facebook's authentication requirements. */ -// ----------------------------- Types ----------------------------- - -interface Cookie { - name: string; - value: string; - domain: string; - path: string; - secure?: boolean; - httpOnly?: boolean; - sameSite?: "strict" | "lax" | "none" | "unspecified"; - session?: boolean; - expirationDate?: number; - partitionKey?: Record; - storeId?: string; -} +// Facebook cookie configuration +const FACEBOOK_COOKIE_CONFIG: CookieConfig = { + name: "Facebook", + domain: ".facebook.com", + envVar: "FACEBOOK_COOKIE", + filePath: "./cookies/facebook.json", +}; interface FacebookAdNode { node: { @@ -203,223 +202,24 @@ export interface FacebookListingDetails { // ----------------------------- Utilities ----------------------------- -/** - * Load Facebook cookies from file or string - */ -async function loadFacebookCookies( - cookiesSource?: string, - cookiePath = "./cookies/facebook.json", -): Promise { - // First try to load from provided string parameter - if (cookiesSource) { - try { - const cookies = JSON.parse(cookiesSource); - if (Array.isArray(cookies)) { - return cookies.filter( - (cookie): cookie is Cookie => - cookie && - typeof cookie.name === "string" && - typeof cookie.value === "string", - ); - } - } catch (e) { - throw new Error(`Invalid cookies JSON provided: ${e}`); - } - } - - // Try to load from specified path - try { - const cookiesPath = cookiePath; - const file = Bun.file(cookiesPath); - if (await file.exists()) { - const content = await file.text(); - const cookies = JSON.parse(content); - if (Array.isArray(cookies)) { - return cookies.filter( - (cookie): cookie is Cookie => - cookie && - typeof cookie.name === "string" && - typeof cookie.value === "string", - ); - } - } - } catch (e) { - console.warn(`Could not load cookies from ${cookiePath}: ${e}`); - } - - return []; -} - /** * Parse Facebook cookie string into Cookie array format + * @deprecated Use parseCookieString from utils/cookies instead */ export function parseFacebookCookieString(cookieString: string): Cookie[] { - if (!cookieString || !cookieString.trim()) { - return []; - } - - return cookieString - .split(";") - .map((pair) => pair.trim()) - .filter((pair) => pair.includes("=")) - .map((pair) => { - const [name, value] = pair.split("=", 2); - const trimmedName = name.trim(); - const trimmedValue = value.trim(); - - // Skip empty names or values - if (!trimmedName || !trimmedValue) { - return null; - } - - return { - name: trimmedName, - value: decodeURIComponent(trimmedValue), - domain: ".facebook.com", - path: "/", - secure: true, - httpOnly: false, - sameSite: "lax" as const, - expirationDate: undefined, // Session cookies - }; - }) - .filter((cookie): cookie is Cookie => cookie !== null); + return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain); } /** * Load Facebook cookies with priority: URL param > ENV var > file * @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority) - * @param cookiePath - Path to cookie file (default: ./cookies/facebook.json) (lowest priority) + * @param _cookiePath - Deprecated, uses default path from config */ export async function ensureFacebookCookies( cookiesSource?: string, - cookiePath = "./cookies/facebook.json", + _cookiePath?: string, ): Promise { - // Priority 1: URL parameter (if provided) - if (cookiesSource) { - // Try JSON array format first - try { - const cookies = await loadFacebookCookies(cookiesSource); - if (cookies.length > 0) { - console.log( - `Loaded ${cookies.length} Facebook cookies from URL parameter (JSON format)`, - ); - return cookies; - } - } catch { - // JSON parse failed, try cookie string format as fallback - } - - // Try cookie string format (e.g., "name1=value1; name2=value2") - const cookies = parseFacebookCookieString(cookiesSource); - if (cookies.length > 0) { - console.log( - `Loaded ${cookies.length} Facebook cookies from URL parameter (string format)`, - ); - return cookies; - } - - console.warn( - "URL parameter provided but no valid cookies extracted. Expected JSON array or cookie string.", - ); - } - - // Priority 2: Environment variable - const cookieString = process.env.FACEBOOK_COOKIE; - if (cookieString?.trim()) { - const cookies = parseFacebookCookieString(cookieString); - if (cookies.length > 0) { - console.log( - `Loaded ${cookies.length} Facebook cookies from FACEBOOK_COOKIE env var`, - ); - return cookies; - } - console.warn("FACEBOOK_COOKIE env var contains no valid cookies"); - // Continue to next priority - } - - // Priority 3: Cookie file (fallback) - try { - const file = Bun.file(cookiePath); - if (await file.exists()) { - const content = await file.text(); - - // Try JSON array format first - try { - const parsed = JSON.parse(content); - if (Array.isArray(parsed)) { - const cookies = parsed.filter( - (cookie): cookie is Cookie => - cookie && - typeof cookie.name === "string" && - typeof cookie.value === "string", - ); - if (cookies.length > 0) { - console.log( - `Loaded ${cookies.length} Facebook cookies from ${cookiePath} (JSON format)`, - ); - return cookies; - } - } - } catch { - // JSON parse failed, try cookie string format - } - - // Try cookie string format - const cookies = parseFacebookCookieString(content); - if (cookies.length > 0) { - console.log( - `Loaded ${cookies.length} Facebook cookies from ${cookiePath} (string format)`, - ); - return cookies; - } - - console.warn( - `Cookie file ${cookiePath} exists but no valid cookies extracted`, - ); - } - } catch (e) { - console.warn(`Could not load cookies from ${cookiePath}: ${e}`); - } - - // No cookies found from any source - throw new Error( - "No valid Facebook cookies found. Provide cookies via (in priority order):\n" + - " 1. 'cookies' URL parameter (highest priority), or\n" + - " 2. FACEBOOK_COOKIE environment variable, or\n" + - " 3. ./cookies/facebook.json file (lowest priority)\n" + - 'Format: JSON array or cookie string like "name1=value1; name2=value2"', - ); -} - -/** - * Format cookies array into Cookie header string - */ -function formatCookiesForHeader(cookies: Cookie[], domain: string): string { - const validCookies = cookies - .filter((cookie) => { - // Check if cookie applies to this domain - if (cookie.domain.startsWith(".")) { - // Domain cookie (applies to subdomains) - return ( - domain.endsWith(cookie.domain.slice(1)) || - domain === cookie.domain.slice(1) - ); - } - // Host-only cookie - return cookie.domain === domain; - }) - .filter((cookie) => { - // Check expiration - if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) { - return false; // Expired - } - return true; - }); - - return validCookies - .map((cookie) => `${cookie.name}=${cookie.value}`) - .join("; "); + return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource); } class HttpError extends Error { @@ -1110,28 +910,13 @@ export default async function fetchFacebookItems( export async function fetchFacebookItem( itemId: string, cookiesSource?: string, - cookiePath?: string, + _cookiePath?: string, ): Promise { // Load Facebook cookies - required for Facebook Marketplace access - let cookies: Cookie[]; - if (cookiesSource) { - // Use provided cookie source (backward compatibility) - cookies = await loadFacebookCookies(cookiesSource); - } else { - // Auto-load from file or parse from env var - cookies = await ensureFacebookCookies(cookiePath); - } - - if (cookies.length === 0) { - throw new Error( - "Facebook cookies are required for marketplace access. " + - "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.", - ); - } + const cookies = await ensureFacebookCookies(cookiesSource); // Format cookies for HTTP header - const domain = "www.facebook.com"; - const cookiesHeader = formatCookiesForHeader(cookies, domain); + const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com"); if (!cookiesHeader) { throw new Error( "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",