From 0a246a29bf7ab750e37e21dbe278476472c17f97 Mon Sep 17 00:00:00 2001 From: Dmytro Stanchiev Date: Sat, 2 May 2026 18:58:53 -0400 Subject: [PATCH] feat(facebook): add session warming and challenge detection Facebook Marketplace no longer requires authentication cookies. Session warming sends proper browser headers. Checkpoint and login-wall challenges are detected and handled gracefully. Added marketplace_product_details_page.target extraction path for current item page structure. --- packages/core/src/scrapers/facebook.ts | 134 ++++++++++++++++++------- 1 file changed, 99 insertions(+), 35 deletions(-) diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index f5bc728..f74bf4b 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -10,8 +10,14 @@ import { type CookieConfig, ensureCookies, formatCookiesForHeader, + loadCookiesOptional, parseCookieString, } from "../utils/cookies"; +import { + buildFacebookHeaders, + detectFacebookChallenge, + warmFacebookSession, +} from "../utils/facebook-challenge"; import { formatCentsToCurrency } from "../utils/format"; import { fetchHtml, HttpError, isRecord, RateLimitError } from "../utils/http"; import { logger } from "../utils/logger"; @@ -20,9 +26,10 @@ import { classifyUnstableListings } from "../utils/unstable"; /** * Facebook Marketplace Scraper * - * Note: Facebook Marketplace requires authentication cookies for full access. - * This implementation will return limited or no results without proper authentication. - * This is by design to respect Facebook's authentication requirements. + * Facebook Marketplace returns search results without authentication when + * proper browser headers are sent. Prices and seller details are hidden on + * search results but are available on individual item pages even without + * auth cookies. For full-price search results, provide FACEBOOK_COOKIE. */ // Facebook cookie configuration @@ -263,20 +270,14 @@ function logExtractionMetrics(success: boolean, itemId?: string) { // ----------------------------- HTTP Client ----------------------------- function createFacebookHeaders(cookies: string): Record { - return { - accept: - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", - "cache-control": "no-cache", - "upgrade-insecure-requests": "1", - "sec-fetch-dest": "document", - "sec-fetch-mode": "navigate", - "sec-fetch-site": "none", - "sec-fetch-user": "?1", - "user-agent": - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", - cookie: cookies, - }; + const jar: Record = {}; + if (cookies) { + for (const pair of cookies.split(";")) { + const [name, ...rest] = pair.trim().split("="); + if (name && rest.length > 0) jar[name.trim()] = rest.join("=").trim(); + } + } + return buildFacebookHeaders(jar); } // ----------------------------- Parsing ----------------------------- @@ -286,13 +287,29 @@ export type FacebookResponseKind = | "item" | "auth_gated" | "unavailable" + | "checkpoint" | "unknown"; export function classifyFacebookResponse( htmlString: HTMLString, responseUrl: string, + status = 200, ) { + const challengeType = detectFacebookChallenge( + status, + htmlString, + responseUrl, + ); + if (challengeType === "checkpoint") { + return { + kind: "checkpoint" as const, + authGated: false, + unavailable: false, + }; + } + const authGated = + challengeType === "login_wall" || responseUrl.includes("/login/") || htmlString.includes("You must log in") || htmlString.includes("log in to continue"); @@ -764,6 +781,22 @@ export function extractFacebookItemData( return bestMatch.item; } + // Try marketplace_product_details_page.target path (current item page structure) + for (const candidate of candidates) { + const detailsPage = findKeyInObject( + candidate, + "marketplace_product_details_page", + ) as Record | undefined; + const target = detailsPage?.target as Record | undefined; + if ( + target && + typeof target.id === "string" && + typeof target.marketplace_listing_title === "string" + ) { + return target as unknown as FacebookMarketplaceItem; + } + } + if (htmlString.includes("XCometMarketplacePermalinkController")) { return extractFacebookItemHtmlFallback(htmlString); } @@ -771,6 +804,25 @@ export function extractFacebookItemData( return null; } +function findKeyInObject(obj: unknown, targetKey: string): unknown { + if (obj == null) return undefined; + if (Array.isArray(obj)) { + for (const item of obj) { + const found = findKeyInObject(item, targetKey); + if (found !== undefined) return found; + } + return undefined; + } + if (typeof obj !== "object") return undefined; + const record = obj as Record; + if (targetKey in record) return record[targetKey]; + for (const [, value] of Object.entries(record)) { + const found = findKeyInObject(value, targetKey); + if (found !== undefined) return found; + } + return undefined; +} + /** Parse Facebook marketplace search results into ListingDetails[] */ @@ -1027,16 +1079,18 @@ export default async function fetchFacebookItems( }; }; - const cookies = await ensureFacebookCookies(); + const warmupCookies = await warmFacebookSession(); + const warmupHeader = Object.entries(warmupCookies) + .map(([k, v]) => `${k}=${v}`) + .join("; "); + + const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG); - // Format cookies for HTTP header const domain = "www.facebook.com"; - const cookiesHeader = formatCookiesForHeader(cookies, domain); - if (!cookiesHeader) { - throw new Error( - "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", - ); - } + const userCookiesHeader = formatCookiesForHeader(userCookies, domain); + const cookiesHeader = [warmupHeader, userCookiesHeader] + .filter(Boolean) + .join("; "); const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond)); @@ -1047,7 +1101,9 @@ export default async function fetchFacebookItems( const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`; logger.log(`Fetching Facebook marketplace: ${searchUrl}`); - logger.log(`Using ${cookies.length} cookies for authentication`); + if (userCookies.length > 0) { + logger.log(`Using ${userCookies.length} cookies for authentication`); + } let searchHtml: string; let searchResponseUrl = searchUrl; @@ -1100,6 +1156,13 @@ export default async function fetchFacebookItems( return finalizeResults([]); } + if (classification.kind === "checkpoint") { + logger.warn( + "Facebook marketplace returned a checkpoint challenge. This may require manual verification.", + ); + return finalizeResults([]); + } + if (classification.unavailable) { logger.warn("Facebook marketplace search returned an unavailable route."); return finalizeResults([]); @@ -1149,15 +1212,8 @@ export default async function fetchFacebookItems( export async function fetchFacebookItem( itemId: string, ): Promise { - const cookies = await ensureFacebookCookies(); - - // Format cookies for HTTP header - const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com"); - if (!cookiesHeader) { - throw new Error( - "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", - ); - } + const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG); + const cookiesHeader = formatCookiesForHeader(userCookies, "www.facebook.com"); const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`; @@ -1230,6 +1286,14 @@ export async function fetchFacebookItem( const classification = classifyFacebookResponse(itemHtml, itemResponseUrl); + if (classification.kind === "checkpoint") { + logExtractionMetrics(false, itemId); + logger.warn( + `Checkpoint challenge detected for item ${itemId}. Facebook may be limiting access.`, + ); + return null; + } + if (classification.authGated) { logExtractionMetrics(false, itemId); logger.warn(