fix: cover scraper pricing edge cases

This commit is contained in:
2026-04-22 23:54:07 -04:00
parent b5e14e686a
commit 55faee7dd5
6 changed files with 233 additions and 29 deletions

View File

@@ -38,6 +38,8 @@ export interface EbayListingDetails {
address?: string | null;
}
const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C)\s*\$|\s*[$£¥])/u;
// ----------------------------- Utilities -----------------------------
/**
@@ -253,7 +255,7 @@ function parseEbayListings(
const text = el.textContent?.trim();
if (
text &&
/^\s*[$£¥]/u.test(text) &&
EBAY_PRICE_TEXT_RE.test(text) &&
text.length < 50 &&
!/\d{4}/.test(text)
) {

View File

@@ -890,7 +890,7 @@ export function parseFacebookAds(
if (priceObj.formatted_amount) {
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (match) {
const dollars = Number.parseFloat(match[0].replace(",", ""));
const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
if (!Number.isNaN(dollars)) {
cents = Math.round(dollars * 100);
} else {

View File

@@ -214,14 +214,21 @@ const CATEGORY_SLUGS = Object.fromEntries(
const SEPS = new Set([" ", "", "—", "/", ":", ";", ",", ".", "-"]);
function normalizeLookupKey(value: string): string {
return value.toLowerCase().replace(/[\s-]+/g, "-");
}
/**
* Resolve location ID from name or return numeric ID
*/
export function resolveLocationId(location?: number | string): number {
if (typeof location === "number") return location;
if (typeof location === "string") {
const normalized = location.toLowerCase().replace(/\s+/g, "-");
return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0)
const normalized = normalizeLookupKey(location);
const mapping = Object.entries(LOCATION_MAPPINGS).find(
([key]) => normalizeLookupKey(key) === normalized,
);
return mapping?.[1] ?? 0; // Default to Canada (0)
}
return 0; // Default to Canada
}
@@ -232,12 +239,38 @@ export function resolveLocationId(location?: number | string): number {
export function resolveCategoryId(category?: number | string): number {
if (typeof category === "number") return category;
if (typeof category === "string") {
const normalized = category.toLowerCase().replace(/\s+/g, "-");
return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories
const normalized = normalizeLookupKey(category);
const mapping = Object.entries(CATEGORY_MAPPINGS).find(
([key]) => normalizeLookupKey(key) === normalized,
);
return mapping?.[1] ?? 0; // Default to all categories
}
return 0; // Default to all categories
}
function matchesPriceFilters(
listing: DetailedListing,
searchOptions: Required<SearchOptions>,
): boolean {
const cents = listing.listingPrice?.cents;
if (typeof cents !== "number") return false;
if (
typeof searchOptions.priceMin === "number" &&
cents < searchOptions.priceMin
) {
return false;
}
if (
typeof searchOptions.priceMax === "number" &&
cents > searchOptions.priceMax
) {
return false;
}
return true;
}
/**
* Build search URL with enhanced parameters
*/
@@ -917,32 +950,30 @@ export default async function fetchKijijiItems(
}
}
const filteredListings = allListings.filter((listing) => {
const cents = listing.listingPrice?.cents;
const filteredListings = allListings.filter((listing) =>
matchesPriceFilters(listing, finalSearchOptions),
);
if (typeof cents !== "number") return false;
if (
typeof finalSearchOptions.priceMin === "number" &&
cents < finalSearchOptions.priceMin
) {
return false;
}
if (
typeof finalSearchOptions.priceMax === "number" &&
cents > finalSearchOptions.priceMax
) {
return false;
}
return true;
});
const finalListings = unstableMode.hideUnstableResults
? (() => {
const classified = classifyUnstableListings(allListings);
return {
results: classified.results.filter((listing) =>
matchesPriceFilters(listing, finalSearchOptions),
),
unstableResults: classified.unstableResults.filter((listing) =>
matchesPriceFilters(listing, finalSearchOptions),
),
};
})()
: filteredListings;
console.log(
`\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`,
);
return finalizeResults(
unstableMode.hideUnstableResults ? allListings : filteredListings,
);
return unstableMode.hideUnstableResults
? finalListings
: finalizeResults(finalListings);
}
// Re-export error classes for convenience

View File

@@ -127,6 +127,38 @@ describe("eBay Scraper Cookie Handling", () => {
]);
});
test("prefers the discounted Canadian-formatted price", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">
<s>CA $150.00</s>
<span>CA $100.00</span>
</span>
</li>
</body></html>
`),
}),
) as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({
amountFormatted: "CA $100.00",
cents: 10000,
}),
}),
]);
});
test("returns results and unstableResults when unstable mode is enabled", async () => {
global.fetch = mock(() =>
Promise.resolve({

View File

@@ -1508,6 +1508,33 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Valid Ad");
});
test("parses formatted fallback prices with multiple commas", () => {
const ads = [
{
node: {
listing: {
id: "big-price",
marketplace_listing_title: "Luxury Home",
listing_price: {
amount_with_offset_in_currency: "123456789",
formatted_amount: "$1,234,567.89",
currency: "CAD",
},
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({ cents: 123456789 }),
}),
]);
});
});
});

View File

@@ -49,6 +49,7 @@ describe("Location and Category Resolution", () => {
expect(resolveLocationId("ontario")).toBe(9004);
expect(resolveLocationId("toronto")).toBe(1700273);
expect(resolveLocationId("gta")).toBe(1700272);
expect(resolveLocationId("Nova Scotia")).toBe(9002);
});
test("should handle case insensitive matching", () => {
@@ -291,7 +292,7 @@ describe("fetchKijijiItems", () => {
]);
});
test("classifies unstable mode using all parsed listings before price filtering", async () => {
test("applies price filters to unstable-mode buckets", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
@@ -388,7 +389,118 @@ describe("fetchKijijiItems", () => {
expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }),
],
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
unstableResults: [],
});
});
test("unstable mode keeps out-of-range stable listings out of final results", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
"Listing:3": { url: "/v-out-of-range/k0l0", title: "Out Of Range Stable" },
"Listing:4": { url: "/v-unstable/k0l0", title: "Unstable Listing" },
},
},
},
})}
</script>
</html>
`;
const listingHtml = (title: string, amount: number, slug: string) => `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:detail": {
url: `/${slug}`,
title,
price: { amount, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
},
},
},
},
})}
</script>
</html>
`;
global.fetch = mock((input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (url.includes("/k0c0l1700272")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(searchHtml),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-one/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0")),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-two/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0")),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-out-of-range/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Out Of Range Stable", 20000, "v-out-of-range/k0l0")),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-unstable/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Unstable Listing", 7000, "v-unstable/k0l0")),
headers: { get: () => null },
url,
});
}
throw new Error(`Unexpected URL: ${url}`);
}) as typeof fetch;
const results = await fetchKijijiItems(
"phone",
1000,
"https://www.kijiji.ca",
{ maxPages: 1, priceMin: 8000, priceMax: 15000 },
{},
{ hideUnstableResults: true },
);
expect(results).toEqual({
results: [
expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }),
],
unstableResults: [],
});
});