fix: tighten scraper parsing behavior
This commit is contained in:
@@ -62,7 +62,7 @@ function parseEbayPrice(
|
||||
const cents = Math.round(dollars * 100);
|
||||
|
||||
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
|
||||
let currency = "USD"; // Default
|
||||
let currency = "CAD"; // Default for ebay.ca
|
||||
|
||||
if (
|
||||
cleaned.toUpperCase().includes("CAD") ||
|
||||
@@ -70,7 +70,7 @@ function parseEbayPrice(
|
||||
cleaned.includes("C $")
|
||||
) {
|
||||
currency = "CAD";
|
||||
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
|
||||
} else if (cleaned.toUpperCase().includes("USD")) {
|
||||
currency = "USD";
|
||||
}
|
||||
|
||||
@@ -101,6 +101,7 @@ function parseEbayListings(
|
||||
): EbayListingDetails[] {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const results: EbayListingDetails[] = [];
|
||||
const seenUrls = new Set<string>();
|
||||
|
||||
// Find all listing links by looking for eBay item URLs (/itm/)
|
||||
const linkElements = document.querySelectorAll('a[href*="itm/"]');
|
||||
@@ -118,6 +119,8 @@ function parseEbayListings(
|
||||
: `https://www.ebay.ca${href}`;
|
||||
}
|
||||
|
||||
if (seenUrls.has(href)) continue;
|
||||
|
||||
// Find the container - go up several levels to find the item container
|
||||
// Modern eBay uses complex nested structures (often 5-10 levels deep)
|
||||
let container: Element | null = linkElement;
|
||||
@@ -329,6 +332,7 @@ function parseEbayListings(
|
||||
};
|
||||
|
||||
results.push(listing);
|
||||
seenUrls.add(href);
|
||||
} catch (err) {
|
||||
console.warn(`Error parsing eBay listing: ${err}`);
|
||||
}
|
||||
|
||||
@@ -202,6 +202,14 @@ const SORT_MAPPINGS: Record<string, string> = {
|
||||
distance: "DISTANCE",
|
||||
};
|
||||
|
||||
const LOCATION_SLUGS = Object.fromEntries(
|
||||
Object.entries(LOCATION_MAPPINGS).map(([slug, id]) => [id, slug.replace(/\s+/g, "-")]),
|
||||
) as Record<number, string>;
|
||||
|
||||
const CATEGORY_SLUGS = Object.fromEntries(
|
||||
Object.entries(CATEGORY_MAPPINGS).map(([slug, id]) => [id, slug.replace(/\s+/g, "-")]),
|
||||
) as Record<number, string>;
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
const SEPS = new Set([" ", "–", "—", "/", ":", ";", ",", ".", "-"]);
|
||||
@@ -241,8 +249,8 @@ export function buildSearchUrl(
|
||||
const locationId = resolveLocationId(options.location);
|
||||
const categoryId = resolveCategoryId(options.category);
|
||||
|
||||
const categorySlug = categoryId === 0 ? "buy-sell" : "buy-sell";
|
||||
const locationSlug = locationId === 0 ? "canada" : "canada";
|
||||
const categorySlug = CATEGORY_SLUGS[categoryId] ?? "buy-sell";
|
||||
const locationSlug = LOCATION_SLUGS[locationId] ?? "canada";
|
||||
|
||||
let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`;
|
||||
|
||||
@@ -893,8 +901,28 @@ export default async function fetchKijijiItems(
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nParsed ${allListings.length} detailed listings.`);
|
||||
return finalizeResults(allListings);
|
||||
const filteredListings = allListings.filter((listing) => {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
|
||||
if (typeof cents !== "number") return false;
|
||||
if (
|
||||
typeof finalSearchOptions.priceMin === "number" &&
|
||||
cents < finalSearchOptions.priceMin
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
typeof finalSearchOptions.priceMax === "number" &&
|
||||
cents > finalSearchOptions.priceMax
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
console.log(`\nParsed ${filteredListings.length} detailed listings.`);
|
||||
return finalizeResults(filteredListings);
|
||||
}
|
||||
|
||||
// Re-export error classes for convenience
|
||||
|
||||
Reference in New Issue
Block a user