fix: respect filtered result sets in unstable mode
This commit is contained in:
@@ -291,6 +291,7 @@ async function fetchHtml(
|
||||
): Promise<{ html: HTMLString; responseUrl: string }> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
||||
let lastRateLimitError: HttpError | null = null;
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
@@ -326,12 +327,20 @@ async function fetchHtml(
|
||||
if (!res.ok) {
|
||||
// Respect 429 reset if provided
|
||||
if (res.status === 429) {
|
||||
lastRateLimitError = new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url,
|
||||
);
|
||||
const resetSeconds = rateLimitReset
|
||||
? Number(rateLimitReset)
|
||||
: Number.NaN;
|
||||
const waitMs = Number.isFinite(resetSeconds)
|
||||
? Math.max(0, resetSeconds * 1000)
|
||||
: (attempt + 1) * retryBaseMs;
|
||||
if (attempt >= maxRetries) {
|
||||
throw lastRateLimitError;
|
||||
}
|
||||
await delay(waitMs);
|
||||
continue;
|
||||
}
|
||||
@@ -369,7 +378,7 @@ async function fetchHtml(
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("Exhausted retries without response");
|
||||
throw lastRateLimitError ?? new Error("Exhausted retries without response");
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
|
||||
@@ -959,10 +959,10 @@ export default async function fetchKijijiItems(
|
||||
);
|
||||
|
||||
console.log(
|
||||
`\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`,
|
||||
`\nParsed ${filteredListings.length} detailed listings.`,
|
||||
);
|
||||
return unstableMode.hideUnstableResults
|
||||
? finalizeResults(allListings)
|
||||
? finalizeResults(filteredListings)
|
||||
: finalizeResults(filteredListings);
|
||||
}
|
||||
|
||||
|
||||
@@ -263,6 +263,30 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
// Should eventually succeed after retry
|
||||
});
|
||||
|
||||
test("should handle exhausted rate limiting retries as a 429", async () => {
|
||||
let attempts = 0;
|
||||
|
||||
global.fetch = mock(() => {
|
||||
attempts++;
|
||||
return Promise.resolve({
|
||||
ok: false,
|
||||
status: 429,
|
||||
headers: {
|
||||
get: (header: string) => {
|
||||
if (header === "X-RateLimit-Reset") return "0";
|
||||
return null;
|
||||
},
|
||||
},
|
||||
text: () => Promise.resolve("Rate limited"),
|
||||
});
|
||||
});
|
||||
|
||||
const result = await fetchFacebookItem("429-loop");
|
||||
|
||||
expect(result).toBeNull();
|
||||
expect(attempts).toBe(4);
|
||||
});
|
||||
|
||||
test("should handle sold items", async () => {
|
||||
const mockData = {
|
||||
require: [
|
||||
|
||||
@@ -302,7 +302,7 @@ describe("fetchKijijiItems", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
test("classifies the full parsed Kijiji set in unstable mode", async () => {
|
||||
test("classifies the filtered Kijiji result set in unstable mode", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
@@ -399,11 +399,11 @@ describe("fetchKijijiItems", () => {
|
||||
expect.objectContaining({ title: "Stable Listing One" }),
|
||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||
],
|
||||
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
|
||||
unstableResults: [],
|
||||
});
|
||||
});
|
||||
|
||||
test("uses URL price filters so out-of-range listings do not influence Kijiji classification", async () => {
|
||||
test("keeps out-of-range Kijiji listings out of both buckets and median input", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
@@ -413,7 +413,8 @@ describe("fetchKijijiItems", () => {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
|
||||
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
|
||||
"Listing:3": { url: "/v-unstable/k0l0", title: "Unstable Listing" },
|
||||
"Listing:3": { url: "/v-out-of-range-high/k0l0", title: "Out Of Range High" },
|
||||
"Listing:4": { url: "/v-out-of-range-low/k0l0", title: "Out Of Range Low" },
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -474,10 +475,19 @@ describe("fetchKijijiItems", () => {
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-unstable/k0l0")) {
|
||||
if (url.endsWith("/v-out-of-range-high/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Unstable Listing", 7000, "v-unstable/k0l0")),
|
||||
text: () => Promise.resolve(listingHtml("Out Of Range High", 20000, "v-out-of-range-high/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-out-of-range-low/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Out Of Range Low", 7000, "v-out-of-range-low/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
@@ -500,7 +510,7 @@ describe("fetchKijijiItems", () => {
|
||||
expect.objectContaining({ title: "Stable Listing One" }),
|
||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||
],
|
||||
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
|
||||
unstableResults: [],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user