fix: respect filtered result sets in unstable mode

This commit is contained in:
2026-04-23 05:03:26 -04:00
parent 881c2ddf8c
commit 0a0723a560
4 changed files with 53 additions and 10 deletions

View File

@@ -291,6 +291,7 @@ async function fetchHtml(
): Promise<{ html: HTMLString; responseUrl: string }> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500;
let lastRateLimitError: HttpError | null = null;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
@@ -326,12 +327,20 @@ async function fetchHtml(
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
lastRateLimitError = new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
if (attempt >= maxRetries) {
throw lastRateLimitError;
}
await delay(waitMs);
continue;
}
@@ -369,7 +378,7 @@ async function fetchHtml(
}
}
throw new Error("Exhausted retries without response");
throw lastRateLimitError ?? new Error("Exhausted retries without response");
}
// ----------------------------- Parsing -----------------------------

View File

@@ -959,10 +959,10 @@ export default async function fetchKijijiItems(
);
console.log(
`\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`,
`\nParsed ${filteredListings.length} detailed listings.`,
);
return unstableMode.hideUnstableResults
? finalizeResults(allListings)
? finalizeResults(filteredListings)
: finalizeResults(filteredListings);
}

View File

@@ -263,6 +263,30 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
// Should eventually succeed after retry
});
test("should handle exhausted rate limiting retries as a 429", async () => {
let attempts = 0;
global.fetch = mock(() => {
attempts++;
return Promise.resolve({
ok: false,
status: 429,
headers: {
get: (header: string) => {
if (header === "X-RateLimit-Reset") return "0";
return null;
},
},
text: () => Promise.resolve("Rate limited"),
});
});
const result = await fetchFacebookItem("429-loop");
expect(result).toBeNull();
expect(attempts).toBe(4);
});
test("should handle sold items", async () => {
const mockData = {
require: [

View File

@@ -302,7 +302,7 @@ describe("fetchKijijiItems", () => {
]);
});
test("classifies the full parsed Kijiji set in unstable mode", async () => {
test("classifies the filtered Kijiji result set in unstable mode", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
@@ -399,11 +399,11 @@ describe("fetchKijijiItems", () => {
expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }),
],
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
unstableResults: [],
});
});
test("uses URL price filters so out-of-range listings do not influence Kijiji classification", async () => {
test("keeps out-of-range Kijiji listings out of both buckets and median input", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
@@ -413,7 +413,8 @@ describe("fetchKijijiItems", () => {
__APOLLO_STATE__: {
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
"Listing:3": { url: "/v-unstable/k0l0", title: "Unstable Listing" },
"Listing:3": { url: "/v-out-of-range-high/k0l0", title: "Out Of Range High" },
"Listing:4": { url: "/v-out-of-range-low/k0l0", title: "Out Of Range Low" },
},
},
},
@@ -474,10 +475,19 @@ describe("fetchKijijiItems", () => {
});
}
if (url.endsWith("/v-unstable/k0l0")) {
if (url.endsWith("/v-out-of-range-high/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Unstable Listing", 7000, "v-unstable/k0l0")),
text: () => Promise.resolve(listingHtml("Out Of Range High", 20000, "v-out-of-range-high/k0l0")),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-out-of-range-low/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Out Of Range Low", 7000, "v-out-of-range-low/k0l0")),
headers: { get: () => null },
url,
});
@@ -500,7 +510,7 @@ describe("fetchKijijiItems", () => {
expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }),
],
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
unstableResults: [],
});
});