diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index 2178c68..1750622 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -984,13 +984,13 @@ export function parseFacebookItem( const url = `https://www.facebook.com/marketplace/item/${item.id}`; // Extract price information - let cents = 0; + let cents: number | undefined; let currency = "CAD"; // Default - let amountFormatted = item.formatted_price?.text || "FREE"; + let amountFormatted = item.formatted_price?.text; if (item.listing_price) { currency = item.listing_price.currency || "CAD"; - if (item.listing_price.amount && item.listing_price.amount !== "0.00") { + if (item.listing_price.amount != null) { const amount = Number.parseFloat(item.listing_price.amount); if (!Number.isNaN(amount)) { cents = Math.round(amount * 100); @@ -1037,6 +1037,13 @@ export function parseFacebookItem( listingType = "vehicle"; } + if (cents == null || !amountFormatted) { + if (!listingStatus || listingStatus === "ACTIVE") return null; + + cents = 0; + amountFormatted = item.formatted_price?.text || "PRICE_UNAVAILABLE"; + } + const listingDetails: FacebookListingDetails = { url, title, diff --git a/packages/core/src/scrapers/kijiji.ts b/packages/core/src/scrapers/kijiji.ts index b51bfe5..9c342ae 100644 --- a/packages/core/src/scrapers/kijiji.ts +++ b/packages/core/src/scrapers/kijiji.ts @@ -889,15 +889,19 @@ export default async function fetchKijijiItems( progressBar?.start(totalProgress, currentProgress); // Process in batches for controlled concurrency - const CONCURRENT_REQUESTS = 1; + const CONCURRENT_REQUESTS = Math.max(1, Math.floor(requestsPerSecond)); const results: (DetailedListing | null)[] = []; for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) { const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS); - const batchPromises = batch.map(async (link) => { + const batchPromises = batch.map(async (link, batchIndex) => { try { - const html = await fetchHtml(link, DELAY_MS, { - // Per-request delay keeps detail fetches within REQUESTS_PER_SECOND. + if (batchIndex > 0) { + await new Promise((resolve) => setTimeout(resolve, DELAY_MS * batchIndex)); + } + + const html = await fetchHtml(link, 0, { + // Staggered starts keep request pacing within REQUESTS_PER_SECOND. onRateInfo: (remaining, reset) => { if (remaining && reset) { console.log( @@ -936,6 +940,10 @@ export default async function fetchKijijiItems( const batchResults = await Promise.all(batchPromises); results.push(...batchResults); + if (i + CONCURRENT_REQUESTS < newListingLinks.length) { + await new Promise((resolve) => setTimeout(resolve, DELAY_MS)); + } + } allListings.push( diff --git a/packages/core/test/facebook-core.test.ts b/packages/core/test/facebook-core.test.ts index 2a82e7b..0e5bfd4 100644 --- a/packages/core/test/facebook-core.test.ts +++ b/packages/core/test/facebook-core.test.ts @@ -1532,10 +1532,21 @@ describe("Facebook Marketplace Scraper Core Tests", () => { }; const result = parseFacebookItem(item); - expect(result).not.toBeNull(); - expect(result?.title).toBe("Minimal Item"); - expect(result?.description).toBeUndefined(); - expect(result?.seller).toBeUndefined(); + expect(result).toBeNull(); + }); + + test("returns null when item price data is present but unparseable", () => { + const item = { + id: "456b", + __typename: "GroupCommerceProductItem" as const, + marketplace_listing_title: "Broken Price Item", + formatted_price: { text: "price unavailable" }, + listing_price: { amount: "not-a-number", currency: "CAD" }, + }; + + const result = parseFacebookItem(item); + + expect(result).toBeNull(); }); test("should identify vehicle listings", () => { diff --git a/packages/core/test/kijiji-core.test.ts b/packages/core/test/kijiji-core.test.ts index 772a141..7f08158 100644 --- a/packages/core/test/kijiji-core.test.ts +++ b/packages/core/test/kijiji-core.test.ts @@ -428,6 +428,104 @@ describe("fetchKijijiItems", () => { expect(maxActiveDetailRequests).toBe(1); }); + test("allows bounded concurrency to scale with REQUESTS_PER_SECOND", async () => { + const searchHtml = ` + + + + `; + + const listingHtml = (title: string, slug: string) => ` + + + + `; + + let activeDetailRequests = 0; + let maxActiveDetailRequests = 0; + + global.fetch = mock(async (input: string | URL | Request) => { + const url = typeof input === "string" ? input : input.toString(); + + if (url.includes("/k0c0l1700272")) { + return { + ok: true, + text: () => Promise.resolve(searchHtml), + headers: { get: () => null }, + url, + }; + } + + activeDetailRequests++; + maxActiveDetailRequests = Math.max( + maxActiveDetailRequests, + activeDetailRequests, + ); + + await new Promise((resolve) => setTimeout(resolve, 300)); + + activeDetailRequests--; + + if (url.endsWith("/v-one/k0l0")) { + return { + ok: true, + text: () => Promise.resolve(listingHtml("One", "v-one/k0l0")), + headers: { get: () => null }, + url, + }; + } + + if (url.endsWith("/v-two/k0l0")) { + return { + ok: true, + text: () => Promise.resolve(listingHtml("Two", "v-two/k0l0")), + headers: { get: () => null }, + url, + }; + } + + throw new Error(`Unexpected URL: ${url}`); + }) as typeof fetch; + + const results = await fetchKijijiItems( + "phone", + 4, + "https://www.kijiji.ca", + { maxPages: 1 }, + ); + + expect(results).toHaveLength(2); + expect(maxActiveDetailRequests).toBeGreaterThan(1); + expect(maxActiveDetailRequests).toBeLessThanOrEqual(4); + }); + test("classifies the filtered Kijiji result set in unstable mode", async () => { const searchHtml = `