fix: finalize scraper unstable mode integration

This commit is contained in:
2026-04-23 00:20:21 -04:00
parent 55faee7dd5
commit 881c2ddf8c
6 changed files with 118 additions and 39 deletions

View File

@@ -40,6 +40,16 @@ export interface EbayListingDetails {
const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C)\s*\$|\s*[$£¥])/u;
function canonicalizeEbayItemUrl(url: string): string {
try {
const parsed = new URL(url, "https://www.ebay.ca");
const match = parsed.pathname.match(/\/itm\/[^/?#]+/);
return match ? `${parsed.origin}${match[0]}` : `${parsed.origin}${parsed.pathname}`;
} catch {
return url;
}
}
// ----------------------------- Utilities -----------------------------
/**
@@ -121,7 +131,8 @@ function parseEbayListings(
: `https://www.ebay.ca${href}`;
}
if (seenUrls.has(href)) continue;
const canonicalUrl = canonicalizeEbayItemUrl(href);
if (seenUrls.has(canonicalUrl)) continue;
// Find the container - go up several levels to find the item container
// Modern eBay uses complex nested structures (often 5-10 levels deep)
@@ -334,7 +345,7 @@ function parseEbayListings(
};
results.push(listing);
seenUrls.add(href);
seenUrls.add(canonicalUrl);
} catch (err) {
console.warn(`Error parsing eBay listing: ${err}`);
}

View File

@@ -1289,13 +1289,15 @@ export async function fetchFacebookItem(
return null;
}
if (classification.unavailable) {
const itemData = extractFacebookItemData(itemHtml);
if (classification.unavailable && !itemData) {
logExtractionMetrics(false, itemId);
console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`);
return null;
}
if (classification.kind !== "item") {
if (classification.kind !== "item" && !itemData) {
logExtractionMetrics(false, itemId);
console.warn(
`Item ${itemId} returned unexpected route kind: ${classification.kind}.`,
@@ -1303,7 +1305,6 @@ export async function fetchFacebookItem(
return null;
}
const itemData = extractFacebookItemData(itemHtml);
if (!itemData) {
logExtractionMetrics(false, itemId);

View File

@@ -292,10 +292,14 @@ export function buildSearchUrl(
? SORT_MAPPINGS[options.sortBy]
: "relevancyDesc";
const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC";
const priceMinParam =
typeof options.priceMin === "number" ? `&priceMin=${options.priceMin}` : "";
const priceMaxParam =
typeof options.priceMax === "number" ? `&priceMax=${options.priceMax}` : "";
const pageParam =
options.page && options.page > 1 ? `&page=${options.page}` : "";
url += `?sort=${sortValue}&view=list&order=${sortOrder}${pageParam}`;
url += `?sort=${sortValue}&view=list&order=${sortOrder}${priceMinParam}${priceMaxParam}${pageParam}`;
return url;
}
@@ -954,26 +958,12 @@ export default async function fetchKijijiItems(
matchesPriceFilters(listing, finalSearchOptions),
);
const finalListings = unstableMode.hideUnstableResults
? (() => {
const classified = classifyUnstableListings(allListings);
return {
results: classified.results.filter((listing) =>
matchesPriceFilters(listing, finalSearchOptions),
),
unstableResults: classified.unstableResults.filter((listing) =>
matchesPriceFilters(listing, finalSearchOptions),
),
};
})()
: filteredListings;
console.log(
`\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`,
);
return unstableMode.hideUnstableResults
? finalListings
: finalizeResults(finalListings);
? finalizeResults(allListings)
: finalizeResults(filteredListings);
}
// Re-export error classes for convenience