refactor: use shared cookie utility in facebook scraper
Replace inline cookie parsing with shared utility functions. Maintains backward compatibility with existing exports. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,13 @@
|
|||||||
import cliProgress from "cli-progress";
|
import cliProgress from "cli-progress";
|
||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import type { HTMLString } from "../types/common";
|
import type { HTMLString } from "../types/common";
|
||||||
|
import {
|
||||||
|
type Cookie,
|
||||||
|
type CookieConfig,
|
||||||
|
ensureCookies,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
parseCookieString,
|
||||||
|
} from "../utils/cookies";
|
||||||
import { delay } from "../utils/delay";
|
import { delay } from "../utils/delay";
|
||||||
import { formatCentsToCurrency } from "../utils/format";
|
import { formatCentsToCurrency } from "../utils/format";
|
||||||
import { isRecord } from "../utils/http";
|
import { isRecord } from "../utils/http";
|
||||||
@@ -13,21 +20,13 @@ import { isRecord } from "../utils/http";
|
|||||||
* This is by design to respect Facebook's authentication requirements.
|
* This is by design to respect Facebook's authentication requirements.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// Facebook cookie configuration
|
||||||
|
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
|
||||||
interface Cookie {
|
name: "Facebook",
|
||||||
name: string;
|
domain: ".facebook.com",
|
||||||
value: string;
|
envVar: "FACEBOOK_COOKIE",
|
||||||
domain: string;
|
filePath: "./cookies/facebook.json",
|
||||||
path: string;
|
};
|
||||||
secure?: boolean;
|
|
||||||
httpOnly?: boolean;
|
|
||||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
|
||||||
session?: boolean;
|
|
||||||
expirationDate?: number;
|
|
||||||
partitionKey?: Record<string, unknown>;
|
|
||||||
storeId?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface FacebookAdNode {
|
interface FacebookAdNode {
|
||||||
node: {
|
node: {
|
||||||
@@ -203,223 +202,24 @@ export interface FacebookListingDetails {
|
|||||||
|
|
||||||
// ----------------------------- Utilities -----------------------------
|
// ----------------------------- Utilities -----------------------------
|
||||||
|
|
||||||
/**
|
|
||||||
* Load Facebook cookies from file or string
|
|
||||||
*/
|
|
||||||
async function loadFacebookCookies(
|
|
||||||
cookiesSource?: string,
|
|
||||||
cookiePath = "./cookies/facebook.json",
|
|
||||||
): Promise<Cookie[]> {
|
|
||||||
// First try to load from provided string parameter
|
|
||||||
if (cookiesSource) {
|
|
||||||
try {
|
|
||||||
const cookies = JSON.parse(cookiesSource);
|
|
||||||
if (Array.isArray(cookies)) {
|
|
||||||
return cookies.filter(
|
|
||||||
(cookie): cookie is Cookie =>
|
|
||||||
cookie &&
|
|
||||||
typeof cookie.name === "string" &&
|
|
||||||
typeof cookie.value === "string",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
throw new Error(`Invalid cookies JSON provided: ${e}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to load from specified path
|
|
||||||
try {
|
|
||||||
const cookiesPath = cookiePath;
|
|
||||||
const file = Bun.file(cookiesPath);
|
|
||||||
if (await file.exists()) {
|
|
||||||
const content = await file.text();
|
|
||||||
const cookies = JSON.parse(content);
|
|
||||||
if (Array.isArray(cookies)) {
|
|
||||||
return cookies.filter(
|
|
||||||
(cookie): cookie is Cookie =>
|
|
||||||
cookie &&
|
|
||||||
typeof cookie.name === "string" &&
|
|
||||||
typeof cookie.value === "string",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse Facebook cookie string into Cookie array format
|
* Parse Facebook cookie string into Cookie array format
|
||||||
|
* @deprecated Use parseCookieString from utils/cookies instead
|
||||||
*/
|
*/
|
||||||
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
||||||
if (!cookieString || !cookieString.trim()) {
|
return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
return cookieString
|
|
||||||
.split(";")
|
|
||||||
.map((pair) => pair.trim())
|
|
||||||
.filter((pair) => pair.includes("="))
|
|
||||||
.map((pair) => {
|
|
||||||
const [name, value] = pair.split("=", 2);
|
|
||||||
const trimmedName = name.trim();
|
|
||||||
const trimmedValue = value.trim();
|
|
||||||
|
|
||||||
// Skip empty names or values
|
|
||||||
if (!trimmedName || !trimmedValue) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
name: trimmedName,
|
|
||||||
value: decodeURIComponent(trimmedValue),
|
|
||||||
domain: ".facebook.com",
|
|
||||||
path: "/",
|
|
||||||
secure: true,
|
|
||||||
httpOnly: false,
|
|
||||||
sameSite: "lax" as const,
|
|
||||||
expirationDate: undefined, // Session cookies
|
|
||||||
};
|
|
||||||
})
|
|
||||||
.filter((cookie): cookie is Cookie => cookie !== null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load Facebook cookies with priority: URL param > ENV var > file
|
* Load Facebook cookies with priority: URL param > ENV var > file
|
||||||
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
|
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
|
||||||
* @param cookiePath - Path to cookie file (default: ./cookies/facebook.json) (lowest priority)
|
* @param _cookiePath - Deprecated, uses default path from config
|
||||||
*/
|
*/
|
||||||
export async function ensureFacebookCookies(
|
export async function ensureFacebookCookies(
|
||||||
cookiesSource?: string,
|
cookiesSource?: string,
|
||||||
cookiePath = "./cookies/facebook.json",
|
_cookiePath?: string,
|
||||||
): Promise<Cookie[]> {
|
): Promise<Cookie[]> {
|
||||||
// Priority 1: URL parameter (if provided)
|
return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
|
||||||
if (cookiesSource) {
|
|
||||||
// Try JSON array format first
|
|
||||||
try {
|
|
||||||
const cookies = await loadFacebookCookies(cookiesSource);
|
|
||||||
if (cookies.length > 0) {
|
|
||||||
console.log(
|
|
||||||
`Loaded ${cookies.length} Facebook cookies from URL parameter (JSON format)`,
|
|
||||||
);
|
|
||||||
return cookies;
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// JSON parse failed, try cookie string format as fallback
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try cookie string format (e.g., "name1=value1; name2=value2")
|
|
||||||
const cookies = parseFacebookCookieString(cookiesSource);
|
|
||||||
if (cookies.length > 0) {
|
|
||||||
console.log(
|
|
||||||
`Loaded ${cookies.length} Facebook cookies from URL parameter (string format)`,
|
|
||||||
);
|
|
||||||
return cookies;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.warn(
|
|
||||||
"URL parameter provided but no valid cookies extracted. Expected JSON array or cookie string.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Priority 2: Environment variable
|
|
||||||
const cookieString = process.env.FACEBOOK_COOKIE;
|
|
||||||
if (cookieString?.trim()) {
|
|
||||||
const cookies = parseFacebookCookieString(cookieString);
|
|
||||||
if (cookies.length > 0) {
|
|
||||||
console.log(
|
|
||||||
`Loaded ${cookies.length} Facebook cookies from FACEBOOK_COOKIE env var`,
|
|
||||||
);
|
|
||||||
return cookies;
|
|
||||||
}
|
|
||||||
console.warn("FACEBOOK_COOKIE env var contains no valid cookies");
|
|
||||||
// Continue to next priority
|
|
||||||
}
|
|
||||||
|
|
||||||
// Priority 3: Cookie file (fallback)
|
|
||||||
try {
|
|
||||||
const file = Bun.file(cookiePath);
|
|
||||||
if (await file.exists()) {
|
|
||||||
const content = await file.text();
|
|
||||||
|
|
||||||
// Try JSON array format first
|
|
||||||
try {
|
|
||||||
const parsed = JSON.parse(content);
|
|
||||||
if (Array.isArray(parsed)) {
|
|
||||||
const cookies = parsed.filter(
|
|
||||||
(cookie): cookie is Cookie =>
|
|
||||||
cookie &&
|
|
||||||
typeof cookie.name === "string" &&
|
|
||||||
typeof cookie.value === "string",
|
|
||||||
);
|
|
||||||
if (cookies.length > 0) {
|
|
||||||
console.log(
|
|
||||||
`Loaded ${cookies.length} Facebook cookies from ${cookiePath} (JSON format)`,
|
|
||||||
);
|
|
||||||
return cookies;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// JSON parse failed, try cookie string format
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try cookie string format
|
|
||||||
const cookies = parseFacebookCookieString(content);
|
|
||||||
if (cookies.length > 0) {
|
|
||||||
console.log(
|
|
||||||
`Loaded ${cookies.length} Facebook cookies from ${cookiePath} (string format)`,
|
|
||||||
);
|
|
||||||
return cookies;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.warn(
|
|
||||||
`Cookie file ${cookiePath} exists but no valid cookies extracted`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// No cookies found from any source
|
|
||||||
throw new Error(
|
|
||||||
"No valid Facebook cookies found. Provide cookies via (in priority order):\n" +
|
|
||||||
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
|
||||||
" 2. FACEBOOK_COOKIE environment variable, or\n" +
|
|
||||||
" 3. ./cookies/facebook.json file (lowest priority)\n" +
|
|
||||||
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Format cookies array into Cookie header string
|
|
||||||
*/
|
|
||||||
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
|
|
||||||
const validCookies = cookies
|
|
||||||
.filter((cookie) => {
|
|
||||||
// Check if cookie applies to this domain
|
|
||||||
if (cookie.domain.startsWith(".")) {
|
|
||||||
// Domain cookie (applies to subdomains)
|
|
||||||
return (
|
|
||||||
domain.endsWith(cookie.domain.slice(1)) ||
|
|
||||||
domain === cookie.domain.slice(1)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// Host-only cookie
|
|
||||||
return cookie.domain === domain;
|
|
||||||
})
|
|
||||||
.filter((cookie) => {
|
|
||||||
// Check expiration
|
|
||||||
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
|
||||||
return false; // Expired
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
|
|
||||||
return validCookies
|
|
||||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
|
||||||
.join("; ");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class HttpError extends Error {
|
class HttpError extends Error {
|
||||||
@@ -1110,28 +910,13 @@ export default async function fetchFacebookItems(
|
|||||||
export async function fetchFacebookItem(
|
export async function fetchFacebookItem(
|
||||||
itemId: string,
|
itemId: string,
|
||||||
cookiesSource?: string,
|
cookiesSource?: string,
|
||||||
cookiePath?: string,
|
_cookiePath?: string,
|
||||||
): Promise<FacebookListingDetails | null> {
|
): Promise<FacebookListingDetails | null> {
|
||||||
// Load Facebook cookies - required for Facebook Marketplace access
|
// Load Facebook cookies - required for Facebook Marketplace access
|
||||||
let cookies: Cookie[];
|
const cookies = await ensureFacebookCookies(cookiesSource);
|
||||||
if (cookiesSource) {
|
|
||||||
// Use provided cookie source (backward compatibility)
|
|
||||||
cookies = await loadFacebookCookies(cookiesSource);
|
|
||||||
} else {
|
|
||||||
// Auto-load from file or parse from env var
|
|
||||||
cookies = await ensureFacebookCookies(cookiePath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cookies.length === 0) {
|
|
||||||
throw new Error(
|
|
||||||
"Facebook cookies are required for marketplace access. " +
|
|
||||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format cookies for HTTP header
|
// Format cookies for HTTP header
|
||||||
const domain = "www.facebook.com";
|
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
|
||||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
|
||||||
if (!cookiesHeader) {
|
if (!cookiesHeader) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||||
|
|||||||
Reference in New Issue
Block a user