diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index 8128608..0465362 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -1205,7 +1205,8 @@ export default async function fetchFacebookItems( // Filter to only priced items (already done in parseFacebookAds) const pricedItems = items.filter( - (item) => item.listingPrice?.cents && item.listingPrice.cents > 0, + (item) => + typeof item.listingPrice?.cents === "number" && item.listingPrice.cents >= 0, ); progressBar.update(totalProgress); diff --git a/packages/core/src/scrapers/kijiji.ts b/packages/core/src/scrapers/kijiji.ts index 18d8702..b51bfe5 100644 --- a/packages/core/src/scrapers/kijiji.ts +++ b/packages/core/src/scrapers/kijiji.ts @@ -889,15 +889,15 @@ export default async function fetchKijijiItems( progressBar?.start(totalProgress, currentProgress); // Process in batches for controlled concurrency - const CONCURRENT_REQUESTS = Math.max(1, Math.floor(requestsPerSecond * 2)); // 2x rate for faster processing + const CONCURRENT_REQUESTS = 1; const results: (DetailedListing | null)[] = []; for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) { const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS); const batchPromises = batch.map(async (link) => { try { - const html = await fetchHtml(link, 0, { - // No per-request delay, batch handles rate limit + const html = await fetchHtml(link, DELAY_MS, { + // Per-request delay keeps detail fetches within REQUESTS_PER_SECOND. onRateInfo: (remaining, reset) => { if (remaining && reset) { console.log( @@ -936,12 +936,6 @@ export default async function fetchKijijiItems( const batchResults = await Promise.all(batchPromises); results.push(...batchResults); - // Wait between batches to respect rate limit - if (i + CONCURRENT_REQUESTS < newListingLinks.length) { - await new Promise((resolve) => - setTimeout(resolve, DELAY_MS * batch.length), - ); - } } allListings.push( diff --git a/packages/core/test/facebook-core.test.ts b/packages/core/test/facebook-core.test.ts index 64e5eeb..c672816 100644 --- a/packages/core/test/facebook-core.test.ts +++ b/packages/core/test/facebook-core.test.ts @@ -571,6 +571,56 @@ describe("Facebook Marketplace Scraper Core Tests", () => { expect(results).toHaveLength(1); }); + test("preserves free listings through the public fetch entrypoint", async () => { + const mockSearchHtml = ``; + + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => Promise.resolve(mockSearchHtml), + url: "https://www.facebook.com/marketplace/toronto/search?query=chair", + headers: { + get: () => null, + }, + }), + ); + + const results = await fetchFacebookItems("chair", 1, "toronto", 25); + + expect(results).toEqual([ + expect.objectContaining({ + title: "Free Chair", + listingPrice: expect.objectContaining({ + cents: 0, + amountFormatted: "FREE", + }), + }), + ]); + }); + test("returns results and unstableResults when unstable mode is enabled", async () => { const mockSearchHtml = ` + + `; + + const listingHtml = (title: string, slug: string) => ` + + + + `; + + let activeDetailRequests = 0; + let maxActiveDetailRequests = 0; + + global.fetch = mock(async (input: string | URL | Request) => { + const url = typeof input === "string" ? input : input.toString(); + + if (url.includes("/k0c0l1700272")) { + return { + ok: true, + text: () => Promise.resolve(searchHtml), + headers: { get: () => null }, + url, + }; + } + + activeDetailRequests++; + maxActiveDetailRequests = Math.max( + maxActiveDetailRequests, + activeDetailRequests, + ); + + await new Promise((resolve) => setTimeout(resolve, 5)); + + activeDetailRequests--; + + if (url.endsWith("/v-one/k0l0")) { + return { + ok: true, + text: () => Promise.resolve(listingHtml("One", "v-one/k0l0")), + headers: { get: () => null }, + url, + }; + } + + if (url.endsWith("/v-two/k0l0")) { + return { + ok: true, + text: () => Promise.resolve(listingHtml("Two", "v-two/k0l0")), + headers: { get: () => null }, + url, + }; + } + + if (url.endsWith("/v-three/k0l0")) { + return { + ok: true, + text: () => Promise.resolve(listingHtml("Three", "v-three/k0l0")), + headers: { get: () => null }, + url, + }; + } + + throw new Error(`Unexpected URL: ${url}`); + }) as typeof fetch; + + const results = await fetchKijijiItems( + "phone", + 1, + "https://www.kijiji.ca", + { maxPages: 1 }, + ); + + expect(results).toHaveLength(3); + expect(maxActiveDetailRequests).toBe(1); + }); + test("classifies the filtered Kijiji result set in unstable mode", async () => { const searchHtml = `