fix: respect filtered result sets in unstable mode

This commit is contained in:
2026-04-23 05:03:26 -04:00
parent 881c2ddf8c
commit 0a0723a560
4 changed files with 53 additions and 10 deletions

View File

@@ -291,6 +291,7 @@ async function fetchHtml(
): Promise<{ html: HTMLString; responseUrl: string }> { ): Promise<{ html: HTMLString; responseUrl: string }> {
const maxRetries = opts?.maxRetries ?? 3; const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500; const retryBaseMs = opts?.retryBaseMs ?? 500;
let lastRateLimitError: HttpError | null = null;
for (let attempt = 0; attempt <= maxRetries; attempt++) { for (let attempt = 0; attempt <= maxRetries; attempt++) {
try { try {
@@ -326,12 +327,20 @@ async function fetchHtml(
if (!res.ok) { if (!res.ok) {
// Respect 429 reset if provided // Respect 429 reset if provided
if (res.status === 429) { if (res.status === 429) {
lastRateLimitError = new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
const resetSeconds = rateLimitReset const resetSeconds = rateLimitReset
? Number(rateLimitReset) ? Number(rateLimitReset)
: Number.NaN; : Number.NaN;
const waitMs = Number.isFinite(resetSeconds) const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000) ? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs; : (attempt + 1) * retryBaseMs;
if (attempt >= maxRetries) {
throw lastRateLimitError;
}
await delay(waitMs); await delay(waitMs);
continue; continue;
} }
@@ -369,7 +378,7 @@ async function fetchHtml(
} }
} }
throw new Error("Exhausted retries without response"); throw lastRateLimitError ?? new Error("Exhausted retries without response");
} }
// ----------------------------- Parsing ----------------------------- // ----------------------------- Parsing -----------------------------

View File

@@ -959,10 +959,10 @@ export default async function fetchKijijiItems(
); );
console.log( console.log(
`\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`, `\nParsed ${filteredListings.length} detailed listings.`,
); );
return unstableMode.hideUnstableResults return unstableMode.hideUnstableResults
? finalizeResults(allListings) ? finalizeResults(filteredListings)
: finalizeResults(filteredListings); : finalizeResults(filteredListings);
} }

View File

@@ -263,6 +263,30 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
// Should eventually succeed after retry // Should eventually succeed after retry
}); });
test("should handle exhausted rate limiting retries as a 429", async () => {
let attempts = 0;
global.fetch = mock(() => {
attempts++;
return Promise.resolve({
ok: false,
status: 429,
headers: {
get: (header: string) => {
if (header === "X-RateLimit-Reset") return "0";
return null;
},
},
text: () => Promise.resolve("Rate limited"),
});
});
const result = await fetchFacebookItem("429-loop");
expect(result).toBeNull();
expect(attempts).toBe(4);
});
test("should handle sold items", async () => { test("should handle sold items", async () => {
const mockData = { const mockData = {
require: [ require: [

View File

@@ -302,7 +302,7 @@ describe("fetchKijijiItems", () => {
]); ]);
}); });
test("classifies the full parsed Kijiji set in unstable mode", async () => { test("classifies the filtered Kijiji result set in unstable mode", async () => {
const searchHtml = ` const searchHtml = `
<html> <html>
<script id="__NEXT_DATA__" type="application/json"> <script id="__NEXT_DATA__" type="application/json">
@@ -399,11 +399,11 @@ describe("fetchKijijiItems", () => {
expect.objectContaining({ title: "Stable Listing One" }), expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }), expect.objectContaining({ title: "Stable Listing Two" }),
], ],
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })], unstableResults: [],
}); });
}); });
test("uses URL price filters so out-of-range listings do not influence Kijiji classification", async () => { test("keeps out-of-range Kijiji listings out of both buckets and median input", async () => {
const searchHtml = ` const searchHtml = `
<html> <html>
<script id="__NEXT_DATA__" type="application/json"> <script id="__NEXT_DATA__" type="application/json">
@@ -413,7 +413,8 @@ describe("fetchKijijiItems", () => {
__APOLLO_STATE__: { __APOLLO_STATE__: {
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" }, "Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" }, "Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
"Listing:3": { url: "/v-unstable/k0l0", title: "Unstable Listing" }, "Listing:3": { url: "/v-out-of-range-high/k0l0", title: "Out Of Range High" },
"Listing:4": { url: "/v-out-of-range-low/k0l0", title: "Out Of Range Low" },
}, },
}, },
}, },
@@ -474,10 +475,19 @@ describe("fetchKijijiItems", () => {
}); });
} }
if (url.endsWith("/v-unstable/k0l0")) { if (url.endsWith("/v-out-of-range-high/k0l0")) {
return Promise.resolve({ return Promise.resolve({
ok: true, ok: true,
text: () => Promise.resolve(listingHtml("Unstable Listing", 7000, "v-unstable/k0l0")), text: () => Promise.resolve(listingHtml("Out Of Range High", 20000, "v-out-of-range-high/k0l0")),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-out-of-range-low/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Out Of Range Low", 7000, "v-out-of-range-low/k0l0")),
headers: { get: () => null }, headers: { get: () => null },
url, url,
}); });
@@ -500,7 +510,7 @@ describe("fetchKijijiItems", () => {
expect.objectContaining({ title: "Stable Listing One" }), expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }), expect.objectContaining({ title: "Stable Listing Two" }),
], ],
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })], unstableResults: [],
}); });
}); });