feat(facebook): add session warming and challenge detection
Facebook Marketplace no longer requires authentication cookies. Session warming sends proper browser headers. Checkpoint and login-wall challenges are detected and handled gracefully. Added marketplace_product_details_page.target extraction path for current item page structure.
This commit is contained in:
@@ -10,8 +10,14 @@ import {
|
||||
type CookieConfig,
|
||||
ensureCookies,
|
||||
formatCookiesForHeader,
|
||||
loadCookiesOptional,
|
||||
parseCookieString,
|
||||
} from "../utils/cookies";
|
||||
import {
|
||||
buildFacebookHeaders,
|
||||
detectFacebookChallenge,
|
||||
warmFacebookSession,
|
||||
} from "../utils/facebook-challenge";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import { fetchHtml, HttpError, isRecord, RateLimitError } from "../utils/http";
|
||||
import { logger } from "../utils/logger";
|
||||
@@ -20,9 +26,10 @@ import { classifyUnstableListings } from "../utils/unstable";
|
||||
/**
|
||||
* Facebook Marketplace Scraper
|
||||
*
|
||||
* Note: Facebook Marketplace requires authentication cookies for full access.
|
||||
* This implementation will return limited or no results without proper authentication.
|
||||
* This is by design to respect Facebook's authentication requirements.
|
||||
* Facebook Marketplace returns search results without authentication when
|
||||
* proper browser headers are sent. Prices and seller details are hidden on
|
||||
* search results but are available on individual item pages even without
|
||||
* auth cookies. For full-price search results, provide FACEBOOK_COOKIE.
|
||||
*/
|
||||
|
||||
// Facebook cookie configuration
|
||||
@@ -263,20 +270,14 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
|
||||
// ----------------------------- HTTP Client -----------------------------
|
||||
|
||||
function createFacebookHeaders(cookies: string): Record<string, string> {
|
||||
return {
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||
"cache-control": "no-cache",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"sec-fetch-dest": "document",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-site": "none",
|
||||
"sec-fetch-user": "?1",
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
cookie: cookies,
|
||||
};
|
||||
const jar: Record<string, string> = {};
|
||||
if (cookies) {
|
||||
for (const pair of cookies.split(";")) {
|
||||
const [name, ...rest] = pair.trim().split("=");
|
||||
if (name && rest.length > 0) jar[name.trim()] = rest.join("=").trim();
|
||||
}
|
||||
}
|
||||
return buildFacebookHeaders(jar);
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
@@ -286,13 +287,29 @@ export type FacebookResponseKind =
|
||||
| "item"
|
||||
| "auth_gated"
|
||||
| "unavailable"
|
||||
| "checkpoint"
|
||||
| "unknown";
|
||||
|
||||
export function classifyFacebookResponse(
|
||||
htmlString: HTMLString,
|
||||
responseUrl: string,
|
||||
status = 200,
|
||||
) {
|
||||
const challengeType = detectFacebookChallenge(
|
||||
status,
|
||||
htmlString,
|
||||
responseUrl,
|
||||
);
|
||||
if (challengeType === "checkpoint") {
|
||||
return {
|
||||
kind: "checkpoint" as const,
|
||||
authGated: false,
|
||||
unavailable: false,
|
||||
};
|
||||
}
|
||||
|
||||
const authGated =
|
||||
challengeType === "login_wall" ||
|
||||
responseUrl.includes("/login/") ||
|
||||
htmlString.includes("You must log in") ||
|
||||
htmlString.includes("log in to continue");
|
||||
@@ -764,6 +781,22 @@ export function extractFacebookItemData(
|
||||
return bestMatch.item;
|
||||
}
|
||||
|
||||
// Try marketplace_product_details_page.target path (current item page structure)
|
||||
for (const candidate of candidates) {
|
||||
const detailsPage = findKeyInObject(
|
||||
candidate,
|
||||
"marketplace_product_details_page",
|
||||
) as Record<string, unknown> | undefined;
|
||||
const target = detailsPage?.target as Record<string, unknown> | undefined;
|
||||
if (
|
||||
target &&
|
||||
typeof target.id === "string" &&
|
||||
typeof target.marketplace_listing_title === "string"
|
||||
) {
|
||||
return target as unknown as FacebookMarketplaceItem;
|
||||
}
|
||||
}
|
||||
|
||||
if (htmlString.includes("XCometMarketplacePermalinkController")) {
|
||||
return extractFacebookItemHtmlFallback(htmlString);
|
||||
}
|
||||
@@ -771,6 +804,25 @@ export function extractFacebookItemData(
|
||||
return null;
|
||||
}
|
||||
|
||||
function findKeyInObject(obj: unknown, targetKey: string): unknown {
|
||||
if (obj == null) return undefined;
|
||||
if (Array.isArray(obj)) {
|
||||
for (const item of obj) {
|
||||
const found = findKeyInObject(item, targetKey);
|
||||
if (found !== undefined) return found;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
if (typeof obj !== "object") return undefined;
|
||||
const record = obj as Record<string, unknown>;
|
||||
if (targetKey in record) return record[targetKey];
|
||||
for (const [, value] of Object.entries(record)) {
|
||||
const found = findKeyInObject(value, targetKey);
|
||||
if (found !== undefined) return found;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
Parse Facebook marketplace search results into ListingDetails[]
|
||||
*/
|
||||
@@ -1027,16 +1079,18 @@ export default async function fetchFacebookItems(
|
||||
};
|
||||
};
|
||||
|
||||
const cookies = await ensureFacebookCookies();
|
||||
const warmupCookies = await warmFacebookSession();
|
||||
const warmupHeader = Object.entries(warmupCookies)
|
||||
.map(([k, v]) => `${k}=${v}`)
|
||||
.join("; ");
|
||||
|
||||
const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG);
|
||||
|
||||
// Format cookies for HTTP header
|
||||
const domain = "www.facebook.com";
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
||||
if (!cookiesHeader) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||
);
|
||||
}
|
||||
const userCookiesHeader = formatCookiesForHeader(userCookies, domain);
|
||||
const cookiesHeader = [warmupHeader, userCookiesHeader]
|
||||
.filter(Boolean)
|
||||
.join("; ");
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
|
||||
|
||||
@@ -1047,7 +1101,9 @@ export default async function fetchFacebookItems(
|
||||
const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`;
|
||||
|
||||
logger.log(`Fetching Facebook marketplace: ${searchUrl}`);
|
||||
logger.log(`Using ${cookies.length} cookies for authentication`);
|
||||
if (userCookies.length > 0) {
|
||||
logger.log(`Using ${userCookies.length} cookies for authentication`);
|
||||
}
|
||||
|
||||
let searchHtml: string;
|
||||
let searchResponseUrl = searchUrl;
|
||||
@@ -1100,6 +1156,13 @@ export default async function fetchFacebookItems(
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
if (classification.kind === "checkpoint") {
|
||||
logger.warn(
|
||||
"Facebook marketplace returned a checkpoint challenge. This may require manual verification.",
|
||||
);
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
if (classification.unavailable) {
|
||||
logger.warn("Facebook marketplace search returned an unavailable route.");
|
||||
return finalizeResults([]);
|
||||
@@ -1149,15 +1212,8 @@ export default async function fetchFacebookItems(
|
||||
export async function fetchFacebookItem(
|
||||
itemId: string,
|
||||
): Promise<FacebookListingDetails | null> {
|
||||
const cookies = await ensureFacebookCookies();
|
||||
|
||||
// Format cookies for HTTP header
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
|
||||
if (!cookiesHeader) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||
);
|
||||
}
|
||||
const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG);
|
||||
const cookiesHeader = formatCookiesForHeader(userCookies, "www.facebook.com");
|
||||
|
||||
const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`;
|
||||
|
||||
@@ -1230,6 +1286,14 @@ export async function fetchFacebookItem(
|
||||
|
||||
const classification = classifyFacebookResponse(itemHtml, itemResponseUrl);
|
||||
|
||||
if (classification.kind === "checkpoint") {
|
||||
logExtractionMetrics(false, itemId);
|
||||
logger.warn(
|
||||
`Checkpoint challenge detected for item ${itemId}. Facebook may be limiting access.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (classification.authGated) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
logger.warn(
|
||||
|
||||
Reference in New Issue
Block a user