diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index d331471..6eca4cb 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -369,6 +369,45 @@ async function fetchHtml( // ----------------------------- Parsing ----------------------------- +export type FacebookResponseKind = + | "search" + | "item" + | "auth_gated" + | "unavailable" + | "unknown"; + +export function classifyFacebookResponse( + htmlString: HTMLString, + responseUrl: string, +) { + const authGated = + responseUrl.includes("/login/") || + htmlString.includes("You must log in") || + htmlString.includes("log in to continue"); + + if (authGated) { + return { kind: "auth_gated" as const, authGated: true, unavailable: false }; + } + + const unavailable = + responseUrl.includes("unavailable_product=1") || + htmlString.includes("This listing is no longer available") || + htmlString.includes("listing has been removed"); + if (unavailable) { + return { kind: "unavailable" as const, authGated: false, unavailable: true }; + } + + if (htmlString.includes("XCometMarketplaceSearchController")) { + return { kind: "search" as const, authGated: false, unavailable: false }; + } + + if (htmlString.includes("XCometMarketplacePermalinkController")) { + return { kind: "item" as const, authGated: false, unavailable: false }; + } + + return { kind: "unknown" as const, authGated: false, unavailable: false }; +} + /** Extract marketplace search data from Facebook page script tags */ @@ -970,25 +1009,19 @@ export async function fetchFacebookItem( const itemData = extractFacebookItemData(itemHtml); if (!itemData) { logExtractionMetrics(false, itemId); - // Enhanced checking for specific failure scenarios - if ( - itemHtml.includes("This listing is no longer available") || - itemHtml.includes("listing has been removed") || - itemHtml.includes("This item has been sold") - ) { + + const classification = classifyFacebookResponse(itemHtml, itemUrl); + + if (classification.authGated) { console.warn( - `Item ${itemId} appears to be sold or removed from marketplace.`, + `Authentication failed for item ${itemId}. Cookies may be expired.`, ); return null; } - if ( - itemHtml.includes("log in to Facebook") || - itemHtml.includes("You must log in") || - itemHtml.includes("authentication required") - ) { + if (classification.unavailable || itemHtml.includes("This item has been sold")) { console.warn( - `Authentication failed for item ${itemId}. Cookies may be expired.`, + `Item ${itemId} appears to be sold or removed from marketplace.`, ); return null; } diff --git a/packages/core/test/facebook-core.test.ts b/packages/core/test/facebook-core.test.ts index ea8a222..82bdd20 100644 --- a/packages/core/test/facebook-core.test.ts +++ b/packages/core/test/facebook-core.test.ts @@ -1,5 +1,6 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { + classifyFacebookResponse, ensureFacebookCookies, extractFacebookItemData, extractFacebookMarketplaceData, @@ -571,6 +572,126 @@ describe("Facebook Marketplace Scraper Core Tests", () => { const result = extractFacebookMarketplaceData(html); expect(result).toBeNull(); }); + + test("classifies Comet search responses", () => { + const html = ` + +