refactor: use shared cookie utility in facebook scraper

Replace inline cookie parsing with shared utility functions.
Maintains backward compatibility with existing exports.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-23 19:28:52 -05:00
parent 9bc57d6b54
commit 251fcbb7d9

View File

@@ -1,6 +1,13 @@
import cliProgress from "cli-progress"; import cliProgress from "cli-progress";
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import type { HTMLString } from "../types/common"; import type { HTMLString } from "../types/common";
import {
type Cookie,
type CookieConfig,
ensureCookies,
formatCookiesForHeader,
parseCookieString,
} from "../utils/cookies";
import { delay } from "../utils/delay"; import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format"; import { formatCentsToCurrency } from "../utils/format";
import { isRecord } from "../utils/http"; import { isRecord } from "../utils/http";
@@ -13,21 +20,13 @@ import { isRecord } from "../utils/http";
* This is by design to respect Facebook's authentication requirements. * This is by design to respect Facebook's authentication requirements.
*/ */
// ----------------------------- Types ----------------------------- // Facebook cookie configuration
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
interface Cookie { name: "Facebook",
name: string; domain: ".facebook.com",
value: string; envVar: "FACEBOOK_COOKIE",
domain: string; filePath: "./cookies/facebook.json",
path: string; };
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
interface FacebookAdNode { interface FacebookAdNode {
node: { node: {
@@ -203,223 +202,24 @@ export interface FacebookListingDetails {
// ----------------------------- Utilities ----------------------------- // ----------------------------- Utilities -----------------------------
/**
* Load Facebook cookies from file or string
*/
async function loadFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load from provided string parameter
if (cookiesSource) {
try {
const cookies = JSON.parse(cookiesSource);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
} catch (e) {
throw new Error(`Invalid cookies JSON provided: ${e}`);
}
}
// Try to load from specified path
try {
const cookiesPath = cookiePath;
const file = Bun.file(cookiesPath);
if (await file.exists()) {
const content = await file.text();
const cookies = JSON.parse(content);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
}
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
return [];
}
/** /**
* Parse Facebook cookie string into Cookie array format * Parse Facebook cookie string into Cookie array format
* @deprecated Use parseCookieString from utils/cookies instead
*/ */
export function parseFacebookCookieString(cookieString: string): Cookie[] { export function parseFacebookCookieString(cookieString: string): Cookie[] {
if (!cookieString || !cookieString.trim()) { return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
return [];
}
return cookieString
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, value] = pair.split("=", 2);
const trimmedName = name.trim();
const trimmedValue = value.trim();
// Skip empty names or values
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain: ".facebook.com",
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax" as const,
expirationDate: undefined, // Session cookies
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
} }
/** /**
* Load Facebook cookies with priority: URL param > ENV var > file * Load Facebook cookies with priority: URL param > ENV var > file
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority) * @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
* @param cookiePath - Path to cookie file (default: ./cookies/facebook.json) (lowest priority) * @param _cookiePath - Deprecated, uses default path from config
*/ */
export async function ensureFacebookCookies( export async function ensureFacebookCookies(
cookiesSource?: string, cookiesSource?: string,
cookiePath = "./cookies/facebook.json", _cookiePath?: string,
): Promise<Cookie[]> { ): Promise<Cookie[]> {
// Priority 1: URL parameter (if provided) return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
if (cookiesSource) {
// Try JSON array format first
try {
const cookies = await loadFacebookCookies(cookiesSource);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from URL parameter (JSON format)`,
);
return cookies;
}
} catch {
// JSON parse failed, try cookie string format as fallback
}
// Try cookie string format (e.g., "name1=value1; name2=value2")
const cookies = parseFacebookCookieString(cookiesSource);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from URL parameter (string format)`,
);
return cookies;
}
console.warn(
"URL parameter provided but no valid cookies extracted. Expected JSON array or cookie string.",
);
}
// Priority 2: Environment variable
const cookieString = process.env.FACEBOOK_COOKIE;
if (cookieString?.trim()) {
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from FACEBOOK_COOKIE env var`,
);
return cookies;
}
console.warn("FACEBOOK_COOKIE env var contains no valid cookies");
// Continue to next priority
}
// Priority 3: Cookie file (fallback)
try {
const file = Bun.file(cookiePath);
if (await file.exists()) {
const content = await file.text();
// Try JSON array format first
try {
const parsed = JSON.parse(content);
if (Array.isArray(parsed)) {
const cookies = parsed.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from ${cookiePath} (JSON format)`,
);
return cookies;
}
}
} catch {
// JSON parse failed, try cookie string format
}
// Try cookie string format
const cookies = parseFacebookCookieString(content);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from ${cookiePath} (string format)`,
);
return cookies;
}
console.warn(
`Cookie file ${cookiePath} exists but no valid cookies extracted`,
);
}
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
// No cookies found from any source
throw new Error(
"No valid Facebook cookies found. Provide cookies via (in priority order):\n" +
" 1. 'cookies' URL parameter (highest priority), or\n" +
" 2. FACEBOOK_COOKIE environment variable, or\n" +
" 3. ./cookies/facebook.json file (lowest priority)\n" +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
}
/**
* Format cookies array into Cookie header string
*/
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
domain.endsWith(cookie.domain.slice(1)) ||
domain === cookie.domain.slice(1)
);
}
// Host-only cookie
return cookie.domain === domain;
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false; // Expired
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
} }
class HttpError extends Error { class HttpError extends Error {
@@ -1110,28 +910,13 @@ export default async function fetchFacebookItems(
export async function fetchFacebookItem( export async function fetchFacebookItem(
itemId: string, itemId: string,
cookiesSource?: string, cookiesSource?: string,
cookiePath?: string, _cookiePath?: string,
): Promise<FacebookListingDetails | null> { ): Promise<FacebookListingDetails | null> {
// Load Facebook cookies - required for Facebook Marketplace access // Load Facebook cookies - required for Facebook Marketplace access
let cookies: Cookie[]; const cookies = await ensureFacebookCookies(cookiesSource);
if (cookiesSource) {
// Use provided cookie source (backward compatibility)
cookies = await loadFacebookCookies(cookiesSource);
} else {
// Auto-load from file or parse from env var
cookies = await ensureFacebookCookies(cookiePath);
}
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
);
}
// Format cookies for HTTP header // Format cookies for HTTP header
const domain = "www.facebook.com"; const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
const cookiesHeader = formatCookiesForHeader(cookies, domain);
if (!cookiesHeader) { if (!cookiesHeader) {
throw new Error( throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",