fix: respect filtered result sets in unstable mode
This commit is contained in:
@@ -291,6 +291,7 @@ async function fetchHtml(
|
|||||||
): Promise<{ html: HTMLString; responseUrl: string }> {
|
): Promise<{ html: HTMLString; responseUrl: string }> {
|
||||||
const maxRetries = opts?.maxRetries ?? 3;
|
const maxRetries = opts?.maxRetries ?? 3;
|
||||||
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
||||||
|
let lastRateLimitError: HttpError | null = null;
|
||||||
|
|
||||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||||
try {
|
try {
|
||||||
@@ -326,12 +327,20 @@ async function fetchHtml(
|
|||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
// Respect 429 reset if provided
|
// Respect 429 reset if provided
|
||||||
if (res.status === 429) {
|
if (res.status === 429) {
|
||||||
|
lastRateLimitError = new HttpError(
|
||||||
|
`Request failed with status ${res.status}`,
|
||||||
|
res.status,
|
||||||
|
url,
|
||||||
|
);
|
||||||
const resetSeconds = rateLimitReset
|
const resetSeconds = rateLimitReset
|
||||||
? Number(rateLimitReset)
|
? Number(rateLimitReset)
|
||||||
: Number.NaN;
|
: Number.NaN;
|
||||||
const waitMs = Number.isFinite(resetSeconds)
|
const waitMs = Number.isFinite(resetSeconds)
|
||||||
? Math.max(0, resetSeconds * 1000)
|
? Math.max(0, resetSeconds * 1000)
|
||||||
: (attempt + 1) * retryBaseMs;
|
: (attempt + 1) * retryBaseMs;
|
||||||
|
if (attempt >= maxRetries) {
|
||||||
|
throw lastRateLimitError;
|
||||||
|
}
|
||||||
await delay(waitMs);
|
await delay(waitMs);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -369,7 +378,7 @@ async function fetchHtml(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new Error("Exhausted retries without response");
|
throw lastRateLimitError ?? new Error("Exhausted retries without response");
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----------------------------- Parsing -----------------------------
|
// ----------------------------- Parsing -----------------------------
|
||||||
|
|||||||
@@ -959,10 +959,10 @@ export default async function fetchKijijiItems(
|
|||||||
);
|
);
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`,
|
`\nParsed ${filteredListings.length} detailed listings.`,
|
||||||
);
|
);
|
||||||
return unstableMode.hideUnstableResults
|
return unstableMode.hideUnstableResults
|
||||||
? finalizeResults(allListings)
|
? finalizeResults(filteredListings)
|
||||||
: finalizeResults(filteredListings);
|
: finalizeResults(filteredListings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -263,6 +263,30 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
|||||||
// Should eventually succeed after retry
|
// Should eventually succeed after retry
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("should handle exhausted rate limiting retries as a 429", async () => {
|
||||||
|
let attempts = 0;
|
||||||
|
|
||||||
|
global.fetch = mock(() => {
|
||||||
|
attempts++;
|
||||||
|
return Promise.resolve({
|
||||||
|
ok: false,
|
||||||
|
status: 429,
|
||||||
|
headers: {
|
||||||
|
get: (header: string) => {
|
||||||
|
if (header === "X-RateLimit-Reset") return "0";
|
||||||
|
return null;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
text: () => Promise.resolve("Rate limited"),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await fetchFacebookItem("429-loop");
|
||||||
|
|
||||||
|
expect(result).toBeNull();
|
||||||
|
expect(attempts).toBe(4);
|
||||||
|
});
|
||||||
|
|
||||||
test("should handle sold items", async () => {
|
test("should handle sold items", async () => {
|
||||||
const mockData = {
|
const mockData = {
|
||||||
require: [
|
require: [
|
||||||
|
|||||||
@@ -302,7 +302,7 @@ describe("fetchKijijiItems", () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("classifies the full parsed Kijiji set in unstable mode", async () => {
|
test("classifies the filtered Kijiji result set in unstable mode", async () => {
|
||||||
const searchHtml = `
|
const searchHtml = `
|
||||||
<html>
|
<html>
|
||||||
<script id="__NEXT_DATA__" type="application/json">
|
<script id="__NEXT_DATA__" type="application/json">
|
||||||
@@ -399,11 +399,11 @@ describe("fetchKijijiItems", () => {
|
|||||||
expect.objectContaining({ title: "Stable Listing One" }),
|
expect.objectContaining({ title: "Stable Listing One" }),
|
||||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||||
],
|
],
|
||||||
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
|
unstableResults: [],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
test("uses URL price filters so out-of-range listings do not influence Kijiji classification", async () => {
|
test("keeps out-of-range Kijiji listings out of both buckets and median input", async () => {
|
||||||
const searchHtml = `
|
const searchHtml = `
|
||||||
<html>
|
<html>
|
||||||
<script id="__NEXT_DATA__" type="application/json">
|
<script id="__NEXT_DATA__" type="application/json">
|
||||||
@@ -413,7 +413,8 @@ describe("fetchKijijiItems", () => {
|
|||||||
__APOLLO_STATE__: {
|
__APOLLO_STATE__: {
|
||||||
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
|
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
|
||||||
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
|
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
|
||||||
"Listing:3": { url: "/v-unstable/k0l0", title: "Unstable Listing" },
|
"Listing:3": { url: "/v-out-of-range-high/k0l0", title: "Out Of Range High" },
|
||||||
|
"Listing:4": { url: "/v-out-of-range-low/k0l0", title: "Out Of Range Low" },
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -474,10 +475,19 @@ describe("fetchKijijiItems", () => {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (url.endsWith("/v-unstable/k0l0")) {
|
if (url.endsWith("/v-out-of-range-high/k0l0")) {
|
||||||
return Promise.resolve({
|
return Promise.resolve({
|
||||||
ok: true,
|
ok: true,
|
||||||
text: () => Promise.resolve(listingHtml("Unstable Listing", 7000, "v-unstable/k0l0")),
|
text: () => Promise.resolve(listingHtml("Out Of Range High", 20000, "v-out-of-range-high/k0l0")),
|
||||||
|
headers: { get: () => null },
|
||||||
|
url,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (url.endsWith("/v-out-of-range-low/k0l0")) {
|
||||||
|
return Promise.resolve({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(listingHtml("Out Of Range Low", 7000, "v-out-of-range-low/k0l0")),
|
||||||
headers: { get: () => null },
|
headers: { get: () => null },
|
||||||
url,
|
url,
|
||||||
});
|
});
|
||||||
@@ -500,7 +510,7 @@ describe("fetchKijijiItems", () => {
|
|||||||
expect.objectContaining({ title: "Stable Listing One" }),
|
expect.objectContaining({ title: "Stable Listing One" }),
|
||||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||||
],
|
],
|
||||||
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
|
unstableResults: [],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user