fix: preserve free results and request pacing
This commit is contained in:
@@ -1205,7 +1205,8 @@ export default async function fetchFacebookItems(
|
|||||||
|
|
||||||
// Filter to only priced items (already done in parseFacebookAds)
|
// Filter to only priced items (already done in parseFacebookAds)
|
||||||
const pricedItems = items.filter(
|
const pricedItems = items.filter(
|
||||||
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
|
(item) =>
|
||||||
|
typeof item.listingPrice?.cents === "number" && item.listingPrice.cents >= 0,
|
||||||
);
|
);
|
||||||
|
|
||||||
progressBar.update(totalProgress);
|
progressBar.update(totalProgress);
|
||||||
|
|||||||
@@ -889,15 +889,15 @@ export default async function fetchKijijiItems(
|
|||||||
progressBar?.start(totalProgress, currentProgress);
|
progressBar?.start(totalProgress, currentProgress);
|
||||||
|
|
||||||
// Process in batches for controlled concurrency
|
// Process in batches for controlled concurrency
|
||||||
const CONCURRENT_REQUESTS = Math.max(1, Math.floor(requestsPerSecond * 2)); // 2x rate for faster processing
|
const CONCURRENT_REQUESTS = 1;
|
||||||
const results: (DetailedListing | null)[] = [];
|
const results: (DetailedListing | null)[] = [];
|
||||||
|
|
||||||
for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) {
|
for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) {
|
||||||
const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS);
|
const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS);
|
||||||
const batchPromises = batch.map(async (link) => {
|
const batchPromises = batch.map(async (link) => {
|
||||||
try {
|
try {
|
||||||
const html = await fetchHtml(link, 0, {
|
const html = await fetchHtml(link, DELAY_MS, {
|
||||||
// No per-request delay, batch handles rate limit
|
// Per-request delay keeps detail fetches within REQUESTS_PER_SECOND.
|
||||||
onRateInfo: (remaining, reset) => {
|
onRateInfo: (remaining, reset) => {
|
||||||
if (remaining && reset) {
|
if (remaining && reset) {
|
||||||
console.log(
|
console.log(
|
||||||
@@ -936,12 +936,6 @@ export default async function fetchKijijiItems(
|
|||||||
const batchResults = await Promise.all(batchPromises);
|
const batchResults = await Promise.all(batchPromises);
|
||||||
results.push(...batchResults);
|
results.push(...batchResults);
|
||||||
|
|
||||||
// Wait between batches to respect rate limit
|
|
||||||
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
|
|
||||||
await new Promise((resolve) =>
|
|
||||||
setTimeout(resolve, DELAY_MS * batch.length),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
allListings.push(
|
allListings.push(
|
||||||
|
|||||||
@@ -571,6 +571,56 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
|||||||
expect(results).toHaveLength(1);
|
expect(results).toHaveLength(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("preserves free listings through the public fetch entrypoint", async () => {
|
||||||
|
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
|
||||||
|
payload: {
|
||||||
|
resultGroups: [
|
||||||
|
{
|
||||||
|
edges: [
|
||||||
|
{
|
||||||
|
node: {
|
||||||
|
listing: {
|
||||||
|
id: "free-1",
|
||||||
|
marketplace_listing_title: "Free Chair",
|
||||||
|
listing_price: {
|
||||||
|
amount: "0.00",
|
||||||
|
formatted_amount: "FREE",
|
||||||
|
currency: "CAD",
|
||||||
|
},
|
||||||
|
is_live: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
})}</script></body></html>`;
|
||||||
|
|
||||||
|
global.fetch = mock(() =>
|
||||||
|
Promise.resolve({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(mockSearchHtml),
|
||||||
|
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
|
||||||
|
headers: {
|
||||||
|
get: () => null,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
|
||||||
|
|
||||||
|
expect(results).toEqual([
|
||||||
|
expect.objectContaining({
|
||||||
|
title: "Free Chair",
|
||||||
|
listingPrice: expect.objectContaining({
|
||||||
|
cents: 0,
|
||||||
|
amountFormatted: "FREE",
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
|
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
|
||||||
payload: {
|
payload: {
|
||||||
|
|||||||
@@ -321,6 +321,113 @@ describe("fetchKijijiItems", () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("respects REQUESTS_PER_SECOND without concurrent detail fetch bursts", async () => {
|
||||||
|
const searchHtml = `
|
||||||
|
<html>
|
||||||
|
<script id="__NEXT_DATA__" type="application/json">
|
||||||
|
${JSON.stringify({
|
||||||
|
props: {
|
||||||
|
pageProps: {
|
||||||
|
__APOLLO_STATE__: {
|
||||||
|
"Listing:1": { url: "/v-one/k0l0", title: "One" },
|
||||||
|
"Listing:2": { url: "/v-two/k0l0", title: "Two" },
|
||||||
|
"Listing:3": { url: "/v-three/k0l0", title: "Three" },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})}
|
||||||
|
</script>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
|
||||||
|
const listingHtml = (title: string, slug: string) => `
|
||||||
|
<html>
|
||||||
|
<script id="__NEXT_DATA__" type="application/json">
|
||||||
|
${JSON.stringify({
|
||||||
|
props: {
|
||||||
|
pageProps: {
|
||||||
|
__APOLLO_STATE__: {
|
||||||
|
"Listing:detail": {
|
||||||
|
url: `/${slug}`,
|
||||||
|
title,
|
||||||
|
price: { amount: 10000, currency: "CAD", type: "FIXED" },
|
||||||
|
type: "OFFER",
|
||||||
|
status: "ACTIVE",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})}
|
||||||
|
</script>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
|
||||||
|
let activeDetailRequests = 0;
|
||||||
|
let maxActiveDetailRequests = 0;
|
||||||
|
|
||||||
|
global.fetch = mock(async (input: string | URL | Request) => {
|
||||||
|
const url = typeof input === "string" ? input : input.toString();
|
||||||
|
|
||||||
|
if (url.includes("/k0c0l1700272")) {
|
||||||
|
return {
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(searchHtml),
|
||||||
|
headers: { get: () => null },
|
||||||
|
url,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
activeDetailRequests++;
|
||||||
|
maxActiveDetailRequests = Math.max(
|
||||||
|
maxActiveDetailRequests,
|
||||||
|
activeDetailRequests,
|
||||||
|
);
|
||||||
|
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 5));
|
||||||
|
|
||||||
|
activeDetailRequests--;
|
||||||
|
|
||||||
|
if (url.endsWith("/v-one/k0l0")) {
|
||||||
|
return {
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(listingHtml("One", "v-one/k0l0")),
|
||||||
|
headers: { get: () => null },
|
||||||
|
url,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (url.endsWith("/v-two/k0l0")) {
|
||||||
|
return {
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(listingHtml("Two", "v-two/k0l0")),
|
||||||
|
headers: { get: () => null },
|
||||||
|
url,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (url.endsWith("/v-three/k0l0")) {
|
||||||
|
return {
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(listingHtml("Three", "v-three/k0l0")),
|
||||||
|
headers: { get: () => null },
|
||||||
|
url,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Unexpected URL: ${url}`);
|
||||||
|
}) as typeof fetch;
|
||||||
|
|
||||||
|
const results = await fetchKijijiItems(
|
||||||
|
"phone",
|
||||||
|
1,
|
||||||
|
"https://www.kijiji.ca",
|
||||||
|
{ maxPages: 1 },
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(results).toHaveLength(3);
|
||||||
|
expect(maxActiveDetailRequests).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
test("classifies the filtered Kijiji result set in unstable mode", async () => {
|
test("classifies the filtered Kijiji result set in unstable mode", async () => {
|
||||||
const searchHtml = `
|
const searchHtml = `
|
||||||
<html>
|
<html>
|
||||||
|
|||||||
Reference in New Issue
Block a user