fix: cover scraper pricing edge cases

This commit is contained in:
2026-04-22 23:54:07 -04:00
parent b5e14e686a
commit 55faee7dd5
6 changed files with 233 additions and 29 deletions

View File

@@ -38,6 +38,8 @@ export interface EbayListingDetails {
address?: string | null; address?: string | null;
} }
const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C)\s*\$|\s*[$£¥])/u;
// ----------------------------- Utilities ----------------------------- // ----------------------------- Utilities -----------------------------
/** /**
@@ -253,7 +255,7 @@ function parseEbayListings(
const text = el.textContent?.trim(); const text = el.textContent?.trim();
if ( if (
text && text &&
/^\s*[$£¥]/u.test(text) && EBAY_PRICE_TEXT_RE.test(text) &&
text.length < 50 && text.length < 50 &&
!/\d{4}/.test(text) !/\d{4}/.test(text)
) { ) {

View File

@@ -890,7 +890,7 @@ export function parseFacebookAds(
if (priceObj.formatted_amount) { if (priceObj.formatted_amount) {
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/); const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (match) { if (match) {
const dollars = Number.parseFloat(match[0].replace(",", "")); const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
if (!Number.isNaN(dollars)) { if (!Number.isNaN(dollars)) {
cents = Math.round(dollars * 100); cents = Math.round(dollars * 100);
} else { } else {

View File

@@ -214,14 +214,21 @@ const CATEGORY_SLUGS = Object.fromEntries(
const SEPS = new Set([" ", "", "—", "/", ":", ";", ",", ".", "-"]); const SEPS = new Set([" ", "", "—", "/", ":", ";", ",", ".", "-"]);
function normalizeLookupKey(value: string): string {
return value.toLowerCase().replace(/[\s-]+/g, "-");
}
/** /**
* Resolve location ID from name or return numeric ID * Resolve location ID from name or return numeric ID
*/ */
export function resolveLocationId(location?: number | string): number { export function resolveLocationId(location?: number | string): number {
if (typeof location === "number") return location; if (typeof location === "number") return location;
if (typeof location === "string") { if (typeof location === "string") {
const normalized = location.toLowerCase().replace(/\s+/g, "-"); const normalized = normalizeLookupKey(location);
return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0) const mapping = Object.entries(LOCATION_MAPPINGS).find(
([key]) => normalizeLookupKey(key) === normalized,
);
return mapping?.[1] ?? 0; // Default to Canada (0)
} }
return 0; // Default to Canada return 0; // Default to Canada
} }
@@ -232,12 +239,38 @@ export function resolveLocationId(location?: number | string): number {
export function resolveCategoryId(category?: number | string): number { export function resolveCategoryId(category?: number | string): number {
if (typeof category === "number") return category; if (typeof category === "number") return category;
if (typeof category === "string") { if (typeof category === "string") {
const normalized = category.toLowerCase().replace(/\s+/g, "-"); const normalized = normalizeLookupKey(category);
return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories const mapping = Object.entries(CATEGORY_MAPPINGS).find(
([key]) => normalizeLookupKey(key) === normalized,
);
return mapping?.[1] ?? 0; // Default to all categories
} }
return 0; // Default to all categories return 0; // Default to all categories
} }
function matchesPriceFilters(
listing: DetailedListing,
searchOptions: Required<SearchOptions>,
): boolean {
const cents = listing.listingPrice?.cents;
if (typeof cents !== "number") return false;
if (
typeof searchOptions.priceMin === "number" &&
cents < searchOptions.priceMin
) {
return false;
}
if (
typeof searchOptions.priceMax === "number" &&
cents > searchOptions.priceMax
) {
return false;
}
return true;
}
/** /**
* Build search URL with enhanced parameters * Build search URL with enhanced parameters
*/ */
@@ -917,32 +950,30 @@ export default async function fetchKijijiItems(
} }
} }
const filteredListings = allListings.filter((listing) => { const filteredListings = allListings.filter((listing) =>
const cents = listing.listingPrice?.cents; matchesPriceFilters(listing, finalSearchOptions),
);
if (typeof cents !== "number") return false; const finalListings = unstableMode.hideUnstableResults
if ( ? (() => {
typeof finalSearchOptions.priceMin === "number" && const classified = classifyUnstableListings(allListings);
cents < finalSearchOptions.priceMin return {
) { results: classified.results.filter((listing) =>
return false; matchesPriceFilters(listing, finalSearchOptions),
} ),
if ( unstableResults: classified.unstableResults.filter((listing) =>
typeof finalSearchOptions.priceMax === "number" && matchesPriceFilters(listing, finalSearchOptions),
cents > finalSearchOptions.priceMax ),
) { };
return false; })()
} : filteredListings;
return true;
});
console.log( console.log(
`\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`, `\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`,
); );
return finalizeResults( return unstableMode.hideUnstableResults
unstableMode.hideUnstableResults ? allListings : filteredListings, ? finalListings
); : finalizeResults(finalListings);
} }
// Re-export error classes for convenience // Re-export error classes for convenience

View File

@@ -127,6 +127,38 @@ describe("eBay Scraper Cookie Handling", () => {
]); ]);
}); });
test("prefers the discounted Canadian-formatted price", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">
<s>CA $150.00</s>
<span>CA $100.00</span>
</span>
</li>
</body></html>
`),
}),
) as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({
amountFormatted: "CA $100.00",
cents: 10000,
}),
}),
]);
});
test("returns results and unstableResults when unstable mode is enabled", async () => { test("returns results and unstableResults when unstable mode is enabled", async () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({

View File

@@ -1508,6 +1508,33 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].title).toBe("Valid Ad"); expect(results[0].title).toBe("Valid Ad");
}); });
test("parses formatted fallback prices with multiple commas", () => {
const ads = [
{
node: {
listing: {
id: "big-price",
marketplace_listing_title: "Luxury Home",
listing_price: {
amount_with_offset_in_currency: "123456789",
formatted_amount: "$1,234,567.89",
currency: "CAD",
},
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({ cents: 123456789 }),
}),
]);
});
}); });
}); });

View File

@@ -49,6 +49,7 @@ describe("Location and Category Resolution", () => {
expect(resolveLocationId("ontario")).toBe(9004); expect(resolveLocationId("ontario")).toBe(9004);
expect(resolveLocationId("toronto")).toBe(1700273); expect(resolveLocationId("toronto")).toBe(1700273);
expect(resolveLocationId("gta")).toBe(1700272); expect(resolveLocationId("gta")).toBe(1700272);
expect(resolveLocationId("Nova Scotia")).toBe(9002);
}); });
test("should handle case insensitive matching", () => { test("should handle case insensitive matching", () => {
@@ -291,7 +292,7 @@ describe("fetchKijijiItems", () => {
]); ]);
}); });
test("classifies unstable mode using all parsed listings before price filtering", async () => { test("applies price filters to unstable-mode buckets", async () => {
const searchHtml = ` const searchHtml = `
<html> <html>
<script id="__NEXT_DATA__" type="application/json"> <script id="__NEXT_DATA__" type="application/json">
@@ -388,7 +389,118 @@ describe("fetchKijijiItems", () => {
expect.objectContaining({ title: "Stable Listing One" }), expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }), expect.objectContaining({ title: "Stable Listing Two" }),
], ],
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })], unstableResults: [],
});
});
test("unstable mode keeps out-of-range stable listings out of final results", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
"Listing:3": { url: "/v-out-of-range/k0l0", title: "Out Of Range Stable" },
"Listing:4": { url: "/v-unstable/k0l0", title: "Unstable Listing" },
},
},
},
})}
</script>
</html>
`;
const listingHtml = (title: string, amount: number, slug: string) => `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:detail": {
url: `/${slug}`,
title,
price: { amount, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
},
},
},
},
})}
</script>
</html>
`;
global.fetch = mock((input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (url.includes("/k0c0l1700272")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(searchHtml),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-one/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0")),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-two/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0")),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-out-of-range/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Out Of Range Stable", 20000, "v-out-of-range/k0l0")),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-unstable/k0l0")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(listingHtml("Unstable Listing", 7000, "v-unstable/k0l0")),
headers: { get: () => null },
url,
});
}
throw new Error(`Unexpected URL: ${url}`);
}) as typeof fetch;
const results = await fetchKijijiItems(
"phone",
1000,
"https://www.kijiji.ca",
{ maxPages: 1, priceMin: 8000, priceMax: 15000 },
{},
{ hideUnstableResults: true },
);
expect(results).toEqual({
results: [
expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }),
],
unstableResults: [],
}); });
}); });