fix: harden scraper price parsing

This commit is contained in:
2026-04-23 10:31:08 -04:00
parent 807849e257
commit 244a88e63c
4 changed files with 77 additions and 33 deletions

View File

@@ -240,7 +240,6 @@ function parseEbayListings(
!text.includes("core") && !text.includes("core") &&
!text.includes("ram") && !text.includes("ram") &&
!text.includes("ssd") && !text.includes("ssd") &&
!/\d{4}/.test(text) && // Avoid years like "2024"
!text.includes('"') // Avoid measurements !text.includes('"') // Avoid measurements
) { ) {
priceElement = el; priceElement = el;

View File

@@ -890,30 +890,15 @@ export function parseFacebookAds(
: priceObj.amount; : priceObj.amount;
cents = Math.round(dollars * 100); cents = Math.round(dollars * 100);
} else if (priceObj.amount_with_offset_in_currency != null) { } else if (priceObj.amount_with_offset_in_currency != null) {
// Fallback: try to extract cents from amount_with_offset_in_currency if (!priceObj.formatted_amount) continue;
// This appears to use some exchange rate/multiplier format
const encodedAmount = Number(priceObj.amount_with_offset_in_currency); const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (!Number.isNaN(encodedAmount) && encodedAmount > 0) { if (!match) continue;
// Estimate roughly - this field doesn't contain real cents
// Use formatted_amount to get the actual dollar amount const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
if (priceObj.formatted_amount) { if (Number.isNaN(dollars)) continue;
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (match) { cents = Math.round(dollars * 100);
const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
if (!Number.isNaN(dollars)) {
cents = Math.round(dollars * 100);
} else {
cents = encodedAmount; // fallback
}
} else {
cents = encodedAmount; // fallback
}
} else {
cents = encodedAmount; // fallback
}
} else {
continue; // Invalid price
}
} else { } else {
continue; // No price available continue; // No price available
} }
@@ -977,7 +962,9 @@ export function parseFacebookAds(
}; };
results.push(listingDetails); results.push(listingDetails);
} catch {} } catch (error) {
console.warn("Failed to parse Facebook ad:", error);
}
} }
return results; return results;

View File

@@ -12,18 +12,14 @@ type IsExact<T, U> =
: false; : false;
const getDefaultEbayItems = async () => fetchEbayItems("laptop"); const getDefaultEbayItems = async () => fetchEbayItems("laptop");
const unstableEbayItemsPromise = fetchEbayItems( const getUnstableEbayItems = async () =>
"laptop", fetchEbayItems("laptop", 1000, {}, { hideUnstableResults: true });
1000,
{},
{ hideUnstableResults: true },
);
type _EbayDefaultReturn = Assert< type _EbayDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]> IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
>; >;
type _EbayUnstableReturn = Assert< type _EbayUnstableReturn = Assert<
IsExact< IsExact<
Awaited<typeof unstableEbayItemsPromise>, Awaited<ReturnType<typeof getUnstableEbayItems>>,
UnstableListingBuckets<EbayListingDetails> UnstableListingBuckets<EbayListingDetails>
> >
>; >;
@@ -302,6 +298,38 @@ describe("eBay Scraper Cookie Handling", () => {
]); ]);
}); });
test("accepts higher fallback prices without price classes", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Studio Microphone Bundle</h3>
<div>CA $2500.00</div>
</li>
</body></html>
`),
}),
) as typeof fetch;
const results = await fetchEbayItems("microphone", 1000, {
keywords: ["microphone"],
});
expect(results).toEqual([
expect.objectContaining({
title: "Studio Microphone Bundle",
listingPrice: expect.objectContaining({
amountFormatted: "CA $2500.00",
cents: 250000,
}),
}),
]);
});
test("retains free items when the requested price range includes zero", async () => { test("retains free items when the requested price range includes zero", async () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({

View File

@@ -1613,6 +1613,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}); });
test("should handle malformed ads gracefully", () => { test("should handle malformed ads gracefully", () => {
const originalWarn = console.warn;
const warnMock = mock(() => {});
console.warn = warnMock;
const ads = [ const ads = [
{ {
node: { node: {
@@ -1638,6 +1642,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const results = parseFacebookAds(ads); const results = parseFacebookAds(ads);
expect(results).toHaveLength(1); expect(results).toHaveLength(1);
expect(results[0].title).toBe("Valid Ad"); expect(results[0].title).toBe("Valid Ad");
expect(warnMock).toHaveBeenCalledTimes(1);
console.warn = originalWarn;
}); });
test("parses formatted fallback prices with multiple commas", () => { test("parses formatted fallback prices with multiple commas", () => {
@@ -1667,6 +1674,29 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
]); ]);
}); });
test("does not trust amount_with_offset_in_currency without a parseable formatted price", () => {
const ads = [
{
node: {
listing: {
id: "bad-offset",
marketplace_listing_title: "Broken Price Listing",
listing_price: {
amount_with_offset_in_currency: "123456789",
formatted_amount: "price unavailable",
currency: "CAD",
},
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
expect(results).toEqual([]);
});
test("keeps valid free search listings", () => { test("keeps valid free search listings", () => {
const ads = [ const ads = [
{ {