fix: finalize scraper unstable mode integration
This commit is contained in:
@@ -40,6 +40,16 @@ export interface EbayListingDetails {
|
||||
|
||||
const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C)\s*\$|\s*[$£€¥])/u;
|
||||
|
||||
function canonicalizeEbayItemUrl(url: string): string {
|
||||
try {
|
||||
const parsed = new URL(url, "https://www.ebay.ca");
|
||||
const match = parsed.pathname.match(/\/itm\/[^/?#]+/);
|
||||
return match ? `${parsed.origin}${match[0]}` : `${parsed.origin}${parsed.pathname}`;
|
||||
} catch {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
/**
|
||||
@@ -121,7 +131,8 @@ function parseEbayListings(
|
||||
: `https://www.ebay.ca${href}`;
|
||||
}
|
||||
|
||||
if (seenUrls.has(href)) continue;
|
||||
const canonicalUrl = canonicalizeEbayItemUrl(href);
|
||||
if (seenUrls.has(canonicalUrl)) continue;
|
||||
|
||||
// Find the container - go up several levels to find the item container
|
||||
// Modern eBay uses complex nested structures (often 5-10 levels deep)
|
||||
@@ -334,7 +345,7 @@ function parseEbayListings(
|
||||
};
|
||||
|
||||
results.push(listing);
|
||||
seenUrls.add(href);
|
||||
seenUrls.add(canonicalUrl);
|
||||
} catch (err) {
|
||||
console.warn(`Error parsing eBay listing: ${err}`);
|
||||
}
|
||||
|
||||
@@ -1289,13 +1289,15 @@ export async function fetchFacebookItem(
|
||||
return null;
|
||||
}
|
||||
|
||||
if (classification.unavailable) {
|
||||
const itemData = extractFacebookItemData(itemHtml);
|
||||
|
||||
if (classification.unavailable && !itemData) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (classification.kind !== "item") {
|
||||
if (classification.kind !== "item" && !itemData) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
console.warn(
|
||||
`Item ${itemId} returned unexpected route kind: ${classification.kind}.`,
|
||||
@@ -1303,7 +1305,6 @@ export async function fetchFacebookItem(
|
||||
return null;
|
||||
}
|
||||
|
||||
const itemData = extractFacebookItemData(itemHtml);
|
||||
if (!itemData) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
|
||||
|
||||
@@ -292,10 +292,14 @@ export function buildSearchUrl(
|
||||
? SORT_MAPPINGS[options.sortBy]
|
||||
: "relevancyDesc";
|
||||
const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC";
|
||||
const priceMinParam =
|
||||
typeof options.priceMin === "number" ? `&priceMin=${options.priceMin}` : "";
|
||||
const priceMaxParam =
|
||||
typeof options.priceMax === "number" ? `&priceMax=${options.priceMax}` : "";
|
||||
const pageParam =
|
||||
options.page && options.page > 1 ? `&page=${options.page}` : "";
|
||||
|
||||
url += `?sort=${sortValue}&view=list&order=${sortOrder}${pageParam}`;
|
||||
url += `?sort=${sortValue}&view=list&order=${sortOrder}${priceMinParam}${priceMaxParam}${pageParam}`;
|
||||
|
||||
return url;
|
||||
}
|
||||
@@ -954,26 +958,12 @@ export default async function fetchKijijiItems(
|
||||
matchesPriceFilters(listing, finalSearchOptions),
|
||||
);
|
||||
|
||||
const finalListings = unstableMode.hideUnstableResults
|
||||
? (() => {
|
||||
const classified = classifyUnstableListings(allListings);
|
||||
return {
|
||||
results: classified.results.filter((listing) =>
|
||||
matchesPriceFilters(listing, finalSearchOptions),
|
||||
),
|
||||
unstableResults: classified.unstableResults.filter((listing) =>
|
||||
matchesPriceFilters(listing, finalSearchOptions),
|
||||
),
|
||||
};
|
||||
})()
|
||||
: filteredListings;
|
||||
|
||||
console.log(
|
||||
`\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`,
|
||||
);
|
||||
return unstableMode.hideUnstableResults
|
||||
? finalListings
|
||||
: finalizeResults(finalListings);
|
||||
? finalizeResults(allListings)
|
||||
: finalizeResults(filteredListings);
|
||||
}
|
||||
|
||||
// Re-export error classes for convenience
|
||||
|
||||
Reference in New Issue
Block a user