diff --git a/packages/core/src/scrapers/ebay.ts b/packages/core/src/scrapers/ebay.ts index 0fd55bf..771144c 100644 --- a/packages/core/src/scrapers/ebay.ts +++ b/packages/core/src/scrapers/ebay.ts @@ -40,6 +40,16 @@ export interface EbayListingDetails { const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C)\s*\$|\s*[$£€¥])/u; +function canonicalizeEbayItemUrl(url: string): string { + try { + const parsed = new URL(url, "https://www.ebay.ca"); + const match = parsed.pathname.match(/\/itm\/[^/?#]+/); + return match ? `${parsed.origin}${match[0]}` : `${parsed.origin}${parsed.pathname}`; + } catch { + return url; + } +} + // ----------------------------- Utilities ----------------------------- /** @@ -121,7 +131,8 @@ function parseEbayListings( : `https://www.ebay.ca${href}`; } - if (seenUrls.has(href)) continue; + const canonicalUrl = canonicalizeEbayItemUrl(href); + if (seenUrls.has(canonicalUrl)) continue; // Find the container - go up several levels to find the item container // Modern eBay uses complex nested structures (often 5-10 levels deep) @@ -334,7 +345,7 @@ function parseEbayListings( }; results.push(listing); - seenUrls.add(href); + seenUrls.add(canonicalUrl); } catch (err) { console.warn(`Error parsing eBay listing: ${err}`); } diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index 180aa48..bc33a5f 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -1289,13 +1289,15 @@ export async function fetchFacebookItem( return null; } - if (classification.unavailable) { + const itemData = extractFacebookItemData(itemHtml); + + if (classification.unavailable && !itemData) { logExtractionMetrics(false, itemId); console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`); return null; } - if (classification.kind !== "item") { + if (classification.kind !== "item" && !itemData) { logExtractionMetrics(false, itemId); console.warn( `Item ${itemId} returned unexpected route kind: ${classification.kind}.`, @@ -1303,7 +1305,6 @@ export async function fetchFacebookItem( return null; } - const itemData = extractFacebookItemData(itemHtml); if (!itemData) { logExtractionMetrics(false, itemId); diff --git a/packages/core/src/scrapers/kijiji.ts b/packages/core/src/scrapers/kijiji.ts index 33cfe7b..ea99730 100644 --- a/packages/core/src/scrapers/kijiji.ts +++ b/packages/core/src/scrapers/kijiji.ts @@ -292,10 +292,14 @@ export function buildSearchUrl( ? SORT_MAPPINGS[options.sortBy] : "relevancyDesc"; const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC"; + const priceMinParam = + typeof options.priceMin === "number" ? `&priceMin=${options.priceMin}` : ""; + const priceMaxParam = + typeof options.priceMax === "number" ? `&priceMax=${options.priceMax}` : ""; const pageParam = options.page && options.page > 1 ? `&page=${options.page}` : ""; - url += `?sort=${sortValue}&view=list&order=${sortOrder}${pageParam}`; + url += `?sort=${sortValue}&view=list&order=${sortOrder}${priceMinParam}${priceMaxParam}${pageParam}`; return url; } @@ -954,26 +958,12 @@ export default async function fetchKijijiItems( matchesPriceFilters(listing, finalSearchOptions), ); - const finalListings = unstableMode.hideUnstableResults - ? (() => { - const classified = classifyUnstableListings(allListings); - return { - results: classified.results.filter((listing) => - matchesPriceFilters(listing, finalSearchOptions), - ), - unstableResults: classified.unstableResults.filter((listing) => - matchesPriceFilters(listing, finalSearchOptions), - ), - }; - })() - : filteredListings; - console.log( `\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`, ); return unstableMode.hideUnstableResults - ? finalListings - : finalizeResults(finalListings); + ? finalizeResults(allListings) + : finalizeResults(filteredListings); } // Re-export error classes for convenience diff --git a/packages/core/test/ebay-core.test.ts b/packages/core/test/ebay-core.test.ts index a47f87c..f7bd951 100644 --- a/packages/core/test/ebay-core.test.ts +++ b/packages/core/test/ebay-core.test.ts @@ -101,6 +101,36 @@ describe("eBay Scraper Cookie Handling", () => { ); }); + test("deduplicates tracking variants of the same item URL", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve(` + +
  • + +

    Stable Laptop Bundle

    + CA $100.00 +
  • +
  • + +

    Stable Laptop Bundle

    + CA $100.00 +
  • + + `), + }), + ) as typeof fetch; + + const results = await fetchEbayItems("laptop", 1000); + + expect(results).toHaveLength(1); + expect(results[0]).toEqual( + expect.objectContaining({ url: "https://www.ebay.ca/itm/123?_trkparms=foo" }), + ); + }); + test("treats bare dollar prices as CAD on ebay.ca", async () => { global.fetch = mock(() => Promise.resolve({ diff --git a/packages/core/test/facebook-core.test.ts b/packages/core/test/facebook-core.test.ts index 22396cb..dd80578 100644 --- a/packages/core/test/facebook-core.test.ts +++ b/packages/core/test/facebook-core.test.ts @@ -358,6 +358,53 @@ describe("Facebook Marketplace Scraper Core Tests", () => { ); }); + test("should parse structured data even when an unavailable banner is present", async () => { + const unavailableStructuredHtml = ` + +
    This listing is no longer available.
    + + + + `; + + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => Promise.resolve(unavailableStructuredHtml), + url: "https://www.facebook.com/marketplace/item/458/", + headers: { + get: () => null, + }, + }), + ); + + const result = await fetchFacebookItem("458"); + + expect(result).toEqual( + expect.objectContaining({ + title: "Recovered Item", + listingStatus: "ACTIVE", + }), + ); + }); + test("should handle successful item extraction", async () => { const mockData = { require: [ diff --git a/packages/core/test/kijiji-core.test.ts b/packages/core/test/kijiji-core.test.ts index 16b39e0..67a2fce 100644 --- a/packages/core/test/kijiji-core.test.ts +++ b/packages/core/test/kijiji-core.test.ts @@ -138,6 +138,16 @@ describe("URL Construction", () => { expect(priceUrl).toContain("order=DESC"); }); + test("includes price filters in the generated search URL", () => { + const url = buildSearchUrl("iphone", { + priceMin: 8000, + priceMax: 10000, + }); + + expect(url).toContain("priceMin=8000"); + expect(url).toContain("priceMax=10000"); + }); + test("should handle string location/category inputs", () => { const url = buildSearchUrl("iphone", { location: "toronto", @@ -292,7 +302,7 @@ describe("fetchKijijiItems", () => { ]); }); - test("applies price filters to unstable-mode buckets", async () => { + test("classifies the full parsed Kijiji set in unstable mode", async () => { const searchHtml = `