fix: tighten scraper parsing behavior

This commit is contained in:
2026-04-22 23:41:08 -04:00
parent 08edfa8097
commit 6f9d4db419
4 changed files with 197 additions and 8 deletions

View File

@@ -62,7 +62,7 @@ function parseEbayPrice(
const cents = Math.round(dollars * 100);
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
let currency = "USD"; // Default
let currency = "CAD"; // Default for ebay.ca
if (
cleaned.toUpperCase().includes("CAD") ||
@@ -70,7 +70,7 @@ function parseEbayPrice(
cleaned.includes("C $")
) {
currency = "CAD";
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
} else if (cleaned.toUpperCase().includes("USD")) {
currency = "USD";
}
@@ -101,6 +101,7 @@ function parseEbayListings(
): EbayListingDetails[] {
const { document } = parseHTML(htmlString);
const results: EbayListingDetails[] = [];
const seenUrls = new Set<string>();
// Find all listing links by looking for eBay item URLs (/itm/)
const linkElements = document.querySelectorAll('a[href*="itm/"]');
@@ -118,6 +119,8 @@ function parseEbayListings(
: `https://www.ebay.ca${href}`;
}
if (seenUrls.has(href)) continue;
// Find the container - go up several levels to find the item container
// Modern eBay uses complex nested structures (often 5-10 levels deep)
let container: Element | null = linkElement;
@@ -329,6 +332,7 @@ function parseEbayListings(
};
results.push(listing);
seenUrls.add(href);
} catch (err) {
console.warn(`Error parsing eBay listing: ${err}`);
}

View File

@@ -202,6 +202,14 @@ const SORT_MAPPINGS: Record<string, string> = {
distance: "DISTANCE",
};
const LOCATION_SLUGS = Object.fromEntries(
Object.entries(LOCATION_MAPPINGS).map(([slug, id]) => [id, slug.replace(/\s+/g, "-")]),
) as Record<number, string>;
const CATEGORY_SLUGS = Object.fromEntries(
Object.entries(CATEGORY_MAPPINGS).map(([slug, id]) => [id, slug.replace(/\s+/g, "-")]),
) as Record<number, string>;
// ----------------------------- Utilities -----------------------------
const SEPS = new Set([" ", "", "—", "/", ":", ";", ",", ".", "-"]);
@@ -241,8 +249,8 @@ export function buildSearchUrl(
const locationId = resolveLocationId(options.location);
const categoryId = resolveCategoryId(options.category);
const categorySlug = categoryId === 0 ? "buy-sell" : "buy-sell";
const locationSlug = locationId === 0 ? "canada" : "canada";
const categorySlug = CATEGORY_SLUGS[categoryId] ?? "buy-sell";
const locationSlug = LOCATION_SLUGS[locationId] ?? "canada";
let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`;
@@ -893,8 +901,28 @@ export default async function fetchKijijiItems(
}
}
console.log(`\nParsed ${allListings.length} detailed listings.`);
return finalizeResults(allListings);
const filteredListings = allListings.filter((listing) => {
const cents = listing.listingPrice?.cents;
if (typeof cents !== "number") return false;
if (
typeof finalSearchOptions.priceMin === "number" &&
cents < finalSearchOptions.priceMin
) {
return false;
}
if (
typeof finalSearchOptions.priceMax === "number" &&
cents > finalSearchOptions.priceMax
) {
return false;
}
return true;
});
console.log(`\nParsed ${filteredListings.length} detailed listings.`);
return finalizeResults(filteredListings);
}
// Re-export error classes for convenience