refactor: use shared cookie utility in facebook scraper

Replace inline cookie parsing with shared utility functions.
Maintains backward compatibility with existing exports.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-23 19:28:52 -05:00
parent 9bc57d6b54
commit 251fcbb7d9

View File

@@ -1,6 +1,13 @@
import cliProgress from "cli-progress";
import { parseHTML } from "linkedom";
import type { HTMLString } from "../types/common";
import {
type Cookie,
type CookieConfig,
ensureCookies,
formatCookiesForHeader,
parseCookieString,
} from "../utils/cookies";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import { isRecord } from "../utils/http";
@@ -13,21 +20,13 @@ import { isRecord } from "../utils/http";
* This is by design to respect Facebook's authentication requirements.
*/
// ----------------------------- Types -----------------------------
interface Cookie {
name: string;
value: string;
domain: string;
path: string;
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
// Facebook cookie configuration
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
name: "Facebook",
domain: ".facebook.com",
envVar: "FACEBOOK_COOKIE",
filePath: "./cookies/facebook.json",
};
interface FacebookAdNode {
node: {
@@ -203,223 +202,24 @@ export interface FacebookListingDetails {
// ----------------------------- Utilities -----------------------------
/**
* Load Facebook cookies from file or string
*/
async function loadFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load from provided string parameter
if (cookiesSource) {
try {
const cookies = JSON.parse(cookiesSource);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
} catch (e) {
throw new Error(`Invalid cookies JSON provided: ${e}`);
}
}
// Try to load from specified path
try {
const cookiesPath = cookiePath;
const file = Bun.file(cookiesPath);
if (await file.exists()) {
const content = await file.text();
const cookies = JSON.parse(content);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
}
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
return [];
}
/**
* Parse Facebook cookie string into Cookie array format
* @deprecated Use parseCookieString from utils/cookies instead
*/
export function parseFacebookCookieString(cookieString: string): Cookie[] {
if (!cookieString || !cookieString.trim()) {
return [];
}
return cookieString
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, value] = pair.split("=", 2);
const trimmedName = name.trim();
const trimmedValue = value.trim();
// Skip empty names or values
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain: ".facebook.com",
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax" as const,
expirationDate: undefined, // Session cookies
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
}
/**
* Load Facebook cookies with priority: URL param > ENV var > file
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
* @param cookiePath - Path to cookie file (default: ./cookies/facebook.json) (lowest priority)
* @param _cookiePath - Deprecated, uses default path from config
*/
export async function ensureFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json",
_cookiePath?: string,
): Promise<Cookie[]> {
// Priority 1: URL parameter (if provided)
if (cookiesSource) {
// Try JSON array format first
try {
const cookies = await loadFacebookCookies(cookiesSource);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from URL parameter (JSON format)`,
);
return cookies;
}
} catch {
// JSON parse failed, try cookie string format as fallback
}
// Try cookie string format (e.g., "name1=value1; name2=value2")
const cookies = parseFacebookCookieString(cookiesSource);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from URL parameter (string format)`,
);
return cookies;
}
console.warn(
"URL parameter provided but no valid cookies extracted. Expected JSON array or cookie string.",
);
}
// Priority 2: Environment variable
const cookieString = process.env.FACEBOOK_COOKIE;
if (cookieString?.trim()) {
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from FACEBOOK_COOKIE env var`,
);
return cookies;
}
console.warn("FACEBOOK_COOKIE env var contains no valid cookies");
// Continue to next priority
}
// Priority 3: Cookie file (fallback)
try {
const file = Bun.file(cookiePath);
if (await file.exists()) {
const content = await file.text();
// Try JSON array format first
try {
const parsed = JSON.parse(content);
if (Array.isArray(parsed)) {
const cookies = parsed.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from ${cookiePath} (JSON format)`,
);
return cookies;
}
}
} catch {
// JSON parse failed, try cookie string format
}
// Try cookie string format
const cookies = parseFacebookCookieString(content);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from ${cookiePath} (string format)`,
);
return cookies;
}
console.warn(
`Cookie file ${cookiePath} exists but no valid cookies extracted`,
);
}
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
// No cookies found from any source
throw new Error(
"No valid Facebook cookies found. Provide cookies via (in priority order):\n" +
" 1. 'cookies' URL parameter (highest priority), or\n" +
" 2. FACEBOOK_COOKIE environment variable, or\n" +
" 3. ./cookies/facebook.json file (lowest priority)\n" +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
}
/**
* Format cookies array into Cookie header string
*/
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
domain.endsWith(cookie.domain.slice(1)) ||
domain === cookie.domain.slice(1)
);
}
// Host-only cookie
return cookie.domain === domain;
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false; // Expired
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
}
class HttpError extends Error {
@@ -1110,28 +910,13 @@ export default async function fetchFacebookItems(
export async function fetchFacebookItem(
itemId: string,
cookiesSource?: string,
cookiePath?: string,
_cookiePath?: string,
): Promise<FacebookListingDetails | null> {
// Load Facebook cookies - required for Facebook Marketplace access
let cookies: Cookie[];
if (cookiesSource) {
// Use provided cookie source (backward compatibility)
cookies = await loadFacebookCookies(cookiesSource);
} else {
// Auto-load from file or parse from env var
cookies = await ensureFacebookCookies(cookiePath);
}
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
);
}
const cookies = await ensureFacebookCookies(cookiesSource);
// Format cookies for HTTP header
const domain = "www.facebook.com";
const cookiesHeader = formatCookiesForHeader(cookies, domain);
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
if (!cookiesHeader) {
throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",