diff --git a/packages/core/src/scrapers/ebay.ts b/packages/core/src/scrapers/ebay.ts index 0fd55bf..771144c 100644 --- a/packages/core/src/scrapers/ebay.ts +++ b/packages/core/src/scrapers/ebay.ts @@ -40,6 +40,16 @@ export interface EbayListingDetails { const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C)\s*\$|\s*[$£€¥])/u; +function canonicalizeEbayItemUrl(url: string): string { + try { + const parsed = new URL(url, "https://www.ebay.ca"); + const match = parsed.pathname.match(/\/itm\/[^/?#]+/); + return match ? `${parsed.origin}${match[0]}` : `${parsed.origin}${parsed.pathname}`; + } catch { + return url; + } +} + // ----------------------------- Utilities ----------------------------- /** @@ -121,7 +131,8 @@ function parseEbayListings( : `https://www.ebay.ca${href}`; } - if (seenUrls.has(href)) continue; + const canonicalUrl = canonicalizeEbayItemUrl(href); + if (seenUrls.has(canonicalUrl)) continue; // Find the container - go up several levels to find the item container // Modern eBay uses complex nested structures (often 5-10 levels deep) @@ -334,7 +345,7 @@ function parseEbayListings( }; results.push(listing); - seenUrls.add(href); + seenUrls.add(canonicalUrl); } catch (err) { console.warn(`Error parsing eBay listing: ${err}`); } diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index 180aa48..bc33a5f 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -1289,13 +1289,15 @@ export async function fetchFacebookItem( return null; } - if (classification.unavailable) { + const itemData = extractFacebookItemData(itemHtml); + + if (classification.unavailable && !itemData) { logExtractionMetrics(false, itemId); console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`); return null; } - if (classification.kind !== "item") { + if (classification.kind !== "item" && !itemData) { logExtractionMetrics(false, itemId); console.warn( `Item ${itemId} returned unexpected route kind: ${classification.kind}.`, @@ -1303,7 +1305,6 @@ export async function fetchFacebookItem( return null; } - const itemData = extractFacebookItemData(itemHtml); if (!itemData) { logExtractionMetrics(false, itemId); diff --git a/packages/core/src/scrapers/kijiji.ts b/packages/core/src/scrapers/kijiji.ts index 33cfe7b..ea99730 100644 --- a/packages/core/src/scrapers/kijiji.ts +++ b/packages/core/src/scrapers/kijiji.ts @@ -292,10 +292,14 @@ export function buildSearchUrl( ? SORT_MAPPINGS[options.sortBy] : "relevancyDesc"; const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC"; + const priceMinParam = + typeof options.priceMin === "number" ? `&priceMin=${options.priceMin}` : ""; + const priceMaxParam = + typeof options.priceMax === "number" ? `&priceMax=${options.priceMax}` : ""; const pageParam = options.page && options.page > 1 ? `&page=${options.page}` : ""; - url += `?sort=${sortValue}&view=list&order=${sortOrder}${pageParam}`; + url += `?sort=${sortValue}&view=list&order=${sortOrder}${priceMinParam}${priceMaxParam}${pageParam}`; return url; } @@ -954,26 +958,12 @@ export default async function fetchKijijiItems( matchesPriceFilters(listing, finalSearchOptions), ); - const finalListings = unstableMode.hideUnstableResults - ? (() => { - const classified = classifyUnstableListings(allListings); - return { - results: classified.results.filter((listing) => - matchesPriceFilters(listing, finalSearchOptions), - ), - unstableResults: classified.unstableResults.filter((listing) => - matchesPriceFilters(listing, finalSearchOptions), - ), - }; - })() - : filteredListings; - console.log( `\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`, ); return unstableMode.hideUnstableResults - ? finalListings - : finalizeResults(finalListings); + ? finalizeResults(allListings) + : finalizeResults(filteredListings); } // Re-export error classes for convenience diff --git a/packages/core/test/ebay-core.test.ts b/packages/core/test/ebay-core.test.ts index a47f87c..f7bd951 100644 --- a/packages/core/test/ebay-core.test.ts +++ b/packages/core/test/ebay-core.test.ts @@ -101,6 +101,36 @@ describe("eBay Scraper Cookie Handling", () => { ); }); + test("deduplicates tracking variants of the same item URL", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve(` +
+