refactor: handle facebook route-aware failure states
This commit is contained in:
@@ -283,7 +283,7 @@ async function fetchHtml(
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
cookies?: string;
|
||||
},
|
||||
): Promise<HTMLString> {
|
||||
): Promise<{ html: HTMLString; responseUrl: string }> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
||||
|
||||
@@ -354,7 +354,7 @@ async function fetchHtml(
|
||||
const html = await res.text();
|
||||
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
||||
await delay(DELAY_MS);
|
||||
return html;
|
||||
return { html, responseUrl: res.url || url };
|
||||
} catch (err) {
|
||||
if (attempt >= maxRetries) throw err;
|
||||
await delay((attempt + 1) * retryBaseMs);
|
||||
@@ -394,6 +394,10 @@ export function classifyFacebookResponse(
|
||||
return { kind: "unavailable" as const, authGated: false, unavailable: true };
|
||||
}
|
||||
|
||||
if (responseUrl.includes("/marketplace/item/")) {
|
||||
return { kind: "item" as const, authGated: false, unavailable: false };
|
||||
}
|
||||
|
||||
if (htmlString.includes("XCometMarketplaceSearchController")) {
|
||||
return { kind: "search" as const, authGated: false, unavailable: false };
|
||||
}
|
||||
@@ -1085,8 +1089,9 @@ export default async function fetchFacebookItems(
|
||||
console.log(`Using ${cookies.length} cookies for authentication`);
|
||||
|
||||
let searchHtml: string;
|
||||
let searchResponseUrl = searchUrl;
|
||||
try {
|
||||
searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
|
||||
const response = await fetchHtml(searchUrl, DELAY_MS, {
|
||||
maxRetries: 3,
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
@@ -1097,6 +1102,8 @@ export default async function fetchFacebookItems(
|
||||
},
|
||||
cookies: cookiesHeader,
|
||||
});
|
||||
searchHtml = response.html;
|
||||
searchResponseUrl = response.responseUrl;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.warn(
|
||||
@@ -1112,6 +1119,24 @@ export default async function fetchFacebookItems(
|
||||
throw err;
|
||||
}
|
||||
|
||||
const classification = classifyFacebookResponse(searchHtml, searchResponseUrl);
|
||||
if (classification.authGated) {
|
||||
console.warn("Facebook marketplace search redirected to login. Cookies may be expired.");
|
||||
return [];
|
||||
}
|
||||
|
||||
if (classification.unavailable) {
|
||||
console.warn("Facebook marketplace search returned an unavailable route.");
|
||||
return [];
|
||||
}
|
||||
|
||||
if (classification.kind !== "search") {
|
||||
console.warn(
|
||||
`Facebook marketplace search returned unexpected route kind: ${classification.kind}.`,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
|
||||
const ads = extractFacebookMarketplaceData(searchHtml);
|
||||
if (!ads || ads.length === 0) {
|
||||
console.warn("No ads parsed from Facebook marketplace page.");
|
||||
@@ -1163,8 +1188,9 @@ export async function fetchFacebookItem(
|
||||
console.log(`Fetching Facebook marketplace item: ${itemUrl}`);
|
||||
|
||||
let itemHtml: string;
|
||||
let itemResponseUrl = itemUrl;
|
||||
try {
|
||||
itemHtml = await fetchHtml(itemUrl, 1000, {
|
||||
const response = await fetchHtml(itemUrl, 1000, {
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
@@ -1174,6 +1200,8 @@ export async function fetchFacebookItem(
|
||||
},
|
||||
cookies: cookiesHeader,
|
||||
});
|
||||
itemHtml = response.html;
|
||||
itemResponseUrl = response.responseUrl;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.warn(
|
||||
@@ -1214,26 +1242,32 @@ export async function fetchFacebookItem(
|
||||
throw err;
|
||||
}
|
||||
|
||||
const classification = classifyFacebookResponse(itemHtml, itemResponseUrl);
|
||||
|
||||
if (classification.authGated) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
console.warn(`Authentication failed for item ${itemId}. Cookies may be expired.`);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (classification.unavailable || itemHtml.includes("This item has been sold")) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (classification.kind !== "item") {
|
||||
logExtractionMetrics(false, itemId);
|
||||
console.warn(
|
||||
`Item ${itemId} returned unexpected route kind: ${classification.kind}.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
const itemData = extractFacebookItemData(itemHtml);
|
||||
if (!itemData) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
|
||||
const classification = classifyFacebookResponse(itemHtml, itemUrl);
|
||||
|
||||
if (classification.authGated) {
|
||||
console.warn(
|
||||
`Authentication failed for item ${itemId}. Cookies may be expired.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (classification.unavailable || itemHtml.includes("This item has been sold")) {
|
||||
console.warn(
|
||||
`Item ${itemId} appears to be sold or removed from marketplace.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`,
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user