From 08edfa8097df8ab851f5e8d95e9f7a9f778e43d0 Mon Sep 17 00:00:00 2001 From: Dmytro Stanchiev Date: Wed, 22 Apr 2026 23:36:00 -0400 Subject: [PATCH] fix: align scraper unstable mode behavior --- packages/core/src/scrapers/ebay.ts | 7 ++++-- packages/core/src/scrapers/facebook.ts | 10 +++++++-- packages/core/src/scrapers/kijiji.ts | 6 +++-- packages/core/test/ebay-core.test.ts | 28 +++++++++++++++++++++--- packages/core/test/facebook-core.test.ts | 21 ++++++++++-------- 5 files changed, 54 insertions(+), 18 deletions(-) diff --git a/packages/core/src/scrapers/ebay.ts b/packages/core/src/scrapers/ebay.ts index 79feb2b..1294408 100644 --- a/packages/core/src/scrapers/ebay.ts +++ b/packages/core/src/scrapers/ebay.ts @@ -1,5 +1,6 @@ import { parseHTML } from "linkedom"; import type { + HTMLString, UnstableListingBuckets, UnstableListingModeOptions, } from "../types/common"; @@ -114,7 +115,7 @@ function parseEbayListings( if (!href.startsWith("http")) { href = href.startsWith("//") ? `https:${href}` - : `https://www.ebay.com${href}`; + : `https://www.ebay.ca${href}`; } // Find the container - go up several levels to find the item container @@ -397,6 +398,8 @@ export default async function fetchEbayItems( } = {}, unstableMode: UnstableListingModeOptions = {}, ) { + const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1; + const finalizeResults = ( listings: EbayListingDetails[], ): EbayListingDetails[] | UnstableListingBuckets => { @@ -436,7 +439,7 @@ export default async function fetchEbayItems( const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`; - const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond)); console.log(`Fetching eBay search: ${searchUrl}`); diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index 01051c1..f09ba38 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -1086,6 +1086,8 @@ export default async function fetchFacebookItems( MAX_ITEMS = 25, unstableMode: UnstableListingModeOptions = {}, ) { + const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1; + const finalizeResults = ( listings: FacebookListingDetails[], ): FacebookListingDetails[] | UnstableListingBuckets => { @@ -1093,7 +1095,11 @@ export default async function fetchFacebookItems( return listings.slice(0, MAX_ITEMS); } - return classifyUnstableListings(listings.slice(0, MAX_ITEMS)); + const classified = classifyUnstableListings(listings); + return { + results: classified.results.slice(0, MAX_ITEMS), + unstableResults: classified.unstableResults, + }; }; const cookies = await ensureFacebookCookies(); @@ -1107,7 +1113,7 @@ export default async function fetchFacebookItems( ); } - const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond)); // Encode search query for URL const encodedQuery = encodeURIComponent(SEARCH_QUERY); diff --git a/packages/core/src/scrapers/kijiji.ts b/packages/core/src/scrapers/kijiji.ts index ff11163..038c1bb 100644 --- a/packages/core/src/scrapers/kijiji.ts +++ b/packages/core/src/scrapers/kijiji.ts @@ -725,6 +725,8 @@ export default async function fetchKijijiItems( listingOptions: ListingFetchOptions = {}, unstableMode: UnstableListingModeOptions = {}, ) { + const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1; + const finalizeResults = ( listings: DetailedListing[], ): DetailedListing[] | UnstableListingBuckets => { @@ -735,7 +737,7 @@ export default async function fetchKijijiItems( return classifyUnstableListings(listings); }; - const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond)); // Load Kijiji cookies (optional - helps bypass bot detection) const cookies = await loadCookiesOptional( @@ -824,7 +826,7 @@ export default async function fetchKijijiItems( progressBar?.start(totalProgress, currentProgress); // Process in batches for controlled concurrency - const CONCURRENT_REQUESTS = REQUESTS_PER_SECOND * 2; // 2x rate for faster processing + const CONCURRENT_REQUESTS = Math.max(1, Math.floor(requestsPerSecond * 2)); // 2x rate for faster processing const results: (DetailedListing | null)[] = []; for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) { diff --git a/packages/core/test/ebay-core.test.ts b/packages/core/test/ebay-core.test.ts index 7712f46..9823db2 100644 --- a/packages/core/test/ebay-core.test.ts +++ b/packages/core/test/ebay-core.test.ts @@ -38,9 +38,7 @@ describe("eBay Scraper Cookie Handling", () => { const warnMock = mock(() => {}); console.warn = warnMock; - await fetchEbayItems("laptop", 1000, { - cookies: "s=from-request", - }); + await fetchEbayItems("laptop", 1000); expect(global.fetch).toHaveBeenCalledTimes(1); @@ -53,6 +51,30 @@ describe("eBay Scraper Cookie Handling", () => { ); }); + test("keeps relative item links on the ebay.ca host", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve(` + +
  • + +

    Stable Laptop Bundle

    + CA $100.00 +
  • + + `), + }), + ) as typeof fetch; + + const results = await fetchEbayItems("laptop", 1000); + + expect(results).toEqual([ + expect.objectContaining({ url: "https://www.ebay.ca/itm/123" }), + ]); + }); + test("returns results and unstableResults when unstable mode is enabled", async () => { global.fetch = mock(() => Promise.resolve({ diff --git a/packages/core/test/facebook-core.test.ts b/packages/core/test/facebook-core.test.ts index 09bc729..e685dd9 100644 --- a/packages/core/test/facebook-core.test.ts +++ b/packages/core/test/facebook-core.test.ts @@ -521,7 +521,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => { }); }); - test("unstable mode keeps MAX_ITEMS as the classification boundary", async () => { + test("unstable mode classifies before the final MAX_ITEMS limit", async () => { const mockSearchHtml = `