refactor: use shared cookie utility in facebook scraper
Replace inline cookie parsing with shared utility functions. Maintains backward compatibility with existing exports. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,13 @@
|
||||
import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import type { HTMLString } from "../types/common";
|
||||
import {
|
||||
type Cookie,
|
||||
type CookieConfig,
|
||||
ensureCookies,
|
||||
formatCookiesForHeader,
|
||||
parseCookieString,
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import { isRecord } from "../utils/http";
|
||||
@@ -13,21 +20,13 @@ import { isRecord } from "../utils/http";
|
||||
* This is by design to respect Facebook's authentication requirements.
|
||||
*/
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
|
||||
interface Cookie {
|
||||
name: string;
|
||||
value: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
secure?: boolean;
|
||||
httpOnly?: boolean;
|
||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||
session?: boolean;
|
||||
expirationDate?: number;
|
||||
partitionKey?: Record<string, unknown>;
|
||||
storeId?: string;
|
||||
}
|
||||
// Facebook cookie configuration
|
||||
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
|
||||
name: "Facebook",
|
||||
domain: ".facebook.com",
|
||||
envVar: "FACEBOOK_COOKIE",
|
||||
filePath: "./cookies/facebook.json",
|
||||
};
|
||||
|
||||
interface FacebookAdNode {
|
||||
node: {
|
||||
@@ -203,223 +202,24 @@ export interface FacebookListingDetails {
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
/**
|
||||
* Load Facebook cookies from file or string
|
||||
*/
|
||||
async function loadFacebookCookies(
|
||||
cookiesSource?: string,
|
||||
cookiePath = "./cookies/facebook.json",
|
||||
): Promise<Cookie[]> {
|
||||
// First try to load from provided string parameter
|
||||
if (cookiesSource) {
|
||||
try {
|
||||
const cookies = JSON.parse(cookiesSource);
|
||||
if (Array.isArray(cookies)) {
|
||||
return cookies.filter(
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string",
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
throw new Error(`Invalid cookies JSON provided: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to load from specified path
|
||||
try {
|
||||
const cookiesPath = cookiePath;
|
||||
const file = Bun.file(cookiesPath);
|
||||
if (await file.exists()) {
|
||||
const content = await file.text();
|
||||
const cookies = JSON.parse(content);
|
||||
if (Array.isArray(cookies)) {
|
||||
return cookies.filter(
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string",
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Facebook cookie string into Cookie array format
|
||||
* @deprecated Use parseCookieString from utils/cookies instead
|
||||
*/
|
||||
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
||||
if (!cookieString || !cookieString.trim()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return cookieString
|
||||
.split(";")
|
||||
.map((pair) => pair.trim())
|
||||
.filter((pair) => pair.includes("="))
|
||||
.map((pair) => {
|
||||
const [name, value] = pair.split("=", 2);
|
||||
const trimmedName = name.trim();
|
||||
const trimmedValue = value.trim();
|
||||
|
||||
// Skip empty names or values
|
||||
if (!trimmedName || !trimmedValue) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
name: trimmedName,
|
||||
value: decodeURIComponent(trimmedValue),
|
||||
domain: ".facebook.com",
|
||||
path: "/",
|
||||
secure: true,
|
||||
httpOnly: false,
|
||||
sameSite: "lax" as const,
|
||||
expirationDate: undefined, // Session cookies
|
||||
};
|
||||
})
|
||||
.filter((cookie): cookie is Cookie => cookie !== null);
|
||||
return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load Facebook cookies with priority: URL param > ENV var > file
|
||||
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
|
||||
* @param cookiePath - Path to cookie file (default: ./cookies/facebook.json) (lowest priority)
|
||||
* @param _cookiePath - Deprecated, uses default path from config
|
||||
*/
|
||||
export async function ensureFacebookCookies(
|
||||
cookiesSource?: string,
|
||||
cookiePath = "./cookies/facebook.json",
|
||||
_cookiePath?: string,
|
||||
): Promise<Cookie[]> {
|
||||
// Priority 1: URL parameter (if provided)
|
||||
if (cookiesSource) {
|
||||
// Try JSON array format first
|
||||
try {
|
||||
const cookies = await loadFacebookCookies(cookiesSource);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} Facebook cookies from URL parameter (JSON format)`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
} catch {
|
||||
// JSON parse failed, try cookie string format as fallback
|
||||
}
|
||||
|
||||
// Try cookie string format (e.g., "name1=value1; name2=value2")
|
||||
const cookies = parseFacebookCookieString(cookiesSource);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} Facebook cookies from URL parameter (string format)`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
"URL parameter provided but no valid cookies extracted. Expected JSON array or cookie string.",
|
||||
);
|
||||
}
|
||||
|
||||
// Priority 2: Environment variable
|
||||
const cookieString = process.env.FACEBOOK_COOKIE;
|
||||
if (cookieString?.trim()) {
|
||||
const cookies = parseFacebookCookieString(cookieString);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} Facebook cookies from FACEBOOK_COOKIE env var`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
console.warn("FACEBOOK_COOKIE env var contains no valid cookies");
|
||||
// Continue to next priority
|
||||
}
|
||||
|
||||
// Priority 3: Cookie file (fallback)
|
||||
try {
|
||||
const file = Bun.file(cookiePath);
|
||||
if (await file.exists()) {
|
||||
const content = await file.text();
|
||||
|
||||
// Try JSON array format first
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
if (Array.isArray(parsed)) {
|
||||
const cookies = parsed.filter(
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string",
|
||||
);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} Facebook cookies from ${cookiePath} (JSON format)`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// JSON parse failed, try cookie string format
|
||||
}
|
||||
|
||||
// Try cookie string format
|
||||
const cookies = parseFacebookCookieString(content);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} Facebook cookies from ${cookiePath} (string format)`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
`Cookie file ${cookiePath} exists but no valid cookies extracted`,
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
||||
}
|
||||
|
||||
// No cookies found from any source
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Provide cookies via (in priority order):\n" +
|
||||
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
||||
" 2. FACEBOOK_COOKIE environment variable, or\n" +
|
||||
" 3. ./cookies/facebook.json file (lowest priority)\n" +
|
||||
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Format cookies array into Cookie header string
|
||||
*/
|
||||
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
|
||||
const validCookies = cookies
|
||||
.filter((cookie) => {
|
||||
// Check if cookie applies to this domain
|
||||
if (cookie.domain.startsWith(".")) {
|
||||
// Domain cookie (applies to subdomains)
|
||||
return (
|
||||
domain.endsWith(cookie.domain.slice(1)) ||
|
||||
domain === cookie.domain.slice(1)
|
||||
);
|
||||
}
|
||||
// Host-only cookie
|
||||
return cookie.domain === domain;
|
||||
})
|
||||
.filter((cookie) => {
|
||||
// Check expiration
|
||||
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
||||
return false; // Expired
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
return validCookies
|
||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||
.join("; ");
|
||||
return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
|
||||
}
|
||||
|
||||
class HttpError extends Error {
|
||||
@@ -1110,28 +910,13 @@ export default async function fetchFacebookItems(
|
||||
export async function fetchFacebookItem(
|
||||
itemId: string,
|
||||
cookiesSource?: string,
|
||||
cookiePath?: string,
|
||||
_cookiePath?: string,
|
||||
): Promise<FacebookListingDetails | null> {
|
||||
// Load Facebook cookies - required for Facebook Marketplace access
|
||||
let cookies: Cookie[];
|
||||
if (cookiesSource) {
|
||||
// Use provided cookie source (backward compatibility)
|
||||
cookies = await loadFacebookCookies(cookiesSource);
|
||||
} else {
|
||||
// Auto-load from file or parse from env var
|
||||
cookies = await ensureFacebookCookies(cookiePath);
|
||||
}
|
||||
|
||||
if (cookies.length === 0) {
|
||||
throw new Error(
|
||||
"Facebook cookies are required for marketplace access. " +
|
||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
|
||||
);
|
||||
}
|
||||
const cookies = await ensureFacebookCookies(cookiesSource);
|
||||
|
||||
// Format cookies for HTTP header
|
||||
const domain = "www.facebook.com";
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
|
||||
if (!cookiesHeader) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||
|
||||
Reference in New Issue
Block a user