fix: cover scraper pricing edge cases
This commit is contained in:
@@ -38,6 +38,8 @@ export interface EbayListingDetails {
|
||||
address?: string | null;
|
||||
}
|
||||
|
||||
const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C)\s*\$|\s*[$£€¥])/u;
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
/**
|
||||
@@ -253,7 +255,7 @@ function parseEbayListings(
|
||||
const text = el.textContent?.trim();
|
||||
if (
|
||||
text &&
|
||||
/^\s*[$£€¥]/u.test(text) &&
|
||||
EBAY_PRICE_TEXT_RE.test(text) &&
|
||||
text.length < 50 &&
|
||||
!/\d{4}/.test(text)
|
||||
) {
|
||||
|
||||
@@ -890,7 +890,7 @@ export function parseFacebookAds(
|
||||
if (priceObj.formatted_amount) {
|
||||
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
|
||||
if (match) {
|
||||
const dollars = Number.parseFloat(match[0].replace(",", ""));
|
||||
const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
|
||||
if (!Number.isNaN(dollars)) {
|
||||
cents = Math.round(dollars * 100);
|
||||
} else {
|
||||
|
||||
@@ -214,14 +214,21 @@ const CATEGORY_SLUGS = Object.fromEntries(
|
||||
|
||||
const SEPS = new Set([" ", "–", "—", "/", ":", ";", ",", ".", "-"]);
|
||||
|
||||
function normalizeLookupKey(value: string): string {
|
||||
return value.toLowerCase().replace(/[\s-]+/g, "-");
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve location ID from name or return numeric ID
|
||||
*/
|
||||
export function resolveLocationId(location?: number | string): number {
|
||||
if (typeof location === "number") return location;
|
||||
if (typeof location === "string") {
|
||||
const normalized = location.toLowerCase().replace(/\s+/g, "-");
|
||||
return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0)
|
||||
const normalized = normalizeLookupKey(location);
|
||||
const mapping = Object.entries(LOCATION_MAPPINGS).find(
|
||||
([key]) => normalizeLookupKey(key) === normalized,
|
||||
);
|
||||
return mapping?.[1] ?? 0; // Default to Canada (0)
|
||||
}
|
||||
return 0; // Default to Canada
|
||||
}
|
||||
@@ -232,12 +239,38 @@ export function resolveLocationId(location?: number | string): number {
|
||||
export function resolveCategoryId(category?: number | string): number {
|
||||
if (typeof category === "number") return category;
|
||||
if (typeof category === "string") {
|
||||
const normalized = category.toLowerCase().replace(/\s+/g, "-");
|
||||
return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories
|
||||
const normalized = normalizeLookupKey(category);
|
||||
const mapping = Object.entries(CATEGORY_MAPPINGS).find(
|
||||
([key]) => normalizeLookupKey(key) === normalized,
|
||||
);
|
||||
return mapping?.[1] ?? 0; // Default to all categories
|
||||
}
|
||||
return 0; // Default to all categories
|
||||
}
|
||||
|
||||
function matchesPriceFilters(
|
||||
listing: DetailedListing,
|
||||
searchOptions: Required<SearchOptions>,
|
||||
): boolean {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
|
||||
if (typeof cents !== "number") return false;
|
||||
if (
|
||||
typeof searchOptions.priceMin === "number" &&
|
||||
cents < searchOptions.priceMin
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
typeof searchOptions.priceMax === "number" &&
|
||||
cents > searchOptions.priceMax
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build search URL with enhanced parameters
|
||||
*/
|
||||
@@ -917,32 +950,30 @@ export default async function fetchKijijiItems(
|
||||
}
|
||||
}
|
||||
|
||||
const filteredListings = allListings.filter((listing) => {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
const filteredListings = allListings.filter((listing) =>
|
||||
matchesPriceFilters(listing, finalSearchOptions),
|
||||
);
|
||||
|
||||
if (typeof cents !== "number") return false;
|
||||
if (
|
||||
typeof finalSearchOptions.priceMin === "number" &&
|
||||
cents < finalSearchOptions.priceMin
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
typeof finalSearchOptions.priceMax === "number" &&
|
||||
cents > finalSearchOptions.priceMax
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
const finalListings = unstableMode.hideUnstableResults
|
||||
? (() => {
|
||||
const classified = classifyUnstableListings(allListings);
|
||||
return {
|
||||
results: classified.results.filter((listing) =>
|
||||
matchesPriceFilters(listing, finalSearchOptions),
|
||||
),
|
||||
unstableResults: classified.unstableResults.filter((listing) =>
|
||||
matchesPriceFilters(listing, finalSearchOptions),
|
||||
),
|
||||
};
|
||||
})()
|
||||
: filteredListings;
|
||||
|
||||
console.log(
|
||||
`\nParsed ${unstableMode.hideUnstableResults ? allListings.length : filteredListings.length} detailed listings.`,
|
||||
);
|
||||
return finalizeResults(
|
||||
unstableMode.hideUnstableResults ? allListings : filteredListings,
|
||||
);
|
||||
return unstableMode.hideUnstableResults
|
||||
? finalListings
|
||||
: finalizeResults(finalListings);
|
||||
}
|
||||
|
||||
// Re-export error classes for convenience
|
||||
|
||||
@@ -127,6 +127,38 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
test("prefers the discounted Canadian-formatted price", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">
|
||||
<s>CA $150.00</s>
|
||||
<span>CA $100.00</span>
|
||||
</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({
|
||||
amountFormatted: "CA $100.00",
|
||||
cents: 10000,
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
|
||||
@@ -1508,6 +1508,33 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("Valid Ad");
|
||||
});
|
||||
|
||||
test("parses formatted fallback prices with multiple commas", () => {
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "big-price",
|
||||
marketplace_listing_title: "Luxury Home",
|
||||
listing_price: {
|
||||
amount_with_offset_in_currency: "123456789",
|
||||
formatted_amount: "$1,234,567.89",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({ cents: 123456789 }),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -49,6 +49,7 @@ describe("Location and Category Resolution", () => {
|
||||
expect(resolveLocationId("ontario")).toBe(9004);
|
||||
expect(resolveLocationId("toronto")).toBe(1700273);
|
||||
expect(resolveLocationId("gta")).toBe(1700272);
|
||||
expect(resolveLocationId("Nova Scotia")).toBe(9002);
|
||||
});
|
||||
|
||||
test("should handle case insensitive matching", () => {
|
||||
@@ -291,7 +292,7 @@ describe("fetchKijijiItems", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
test("classifies unstable mode using all parsed listings before price filtering", async () => {
|
||||
test("applies price filters to unstable-mode buckets", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
@@ -388,7 +389,118 @@ describe("fetchKijijiItems", () => {
|
||||
expect.objectContaining({ title: "Stable Listing One" }),
|
||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||
],
|
||||
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
|
||||
unstableResults: [],
|
||||
});
|
||||
});
|
||||
|
||||
test("unstable mode keeps out-of-range stable listings out of final results", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
|
||||
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
|
||||
"Listing:3": { url: "/v-out-of-range/k0l0", title: "Out Of Range Stable" },
|
||||
"Listing:4": { url: "/v-unstable/k0l0", title: "Unstable Listing" },
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, amount: number, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
global.fetch = mock((input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/k0c0l1700272")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-one/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-two/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-out-of-range/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Out Of Range Stable", 20000, "v-out-of-range/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-unstable/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Unstable Listing", 7000, "v-unstable/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
1000,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1, priceMin: 8000, priceMax: 15000 },
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({ title: "Stable Listing One" }),
|
||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||
],
|
||||
unstableResults: [],
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user