fix: harden scraper price parsing

This commit is contained in:
2026-04-23 10:31:08 -04:00
parent 807849e257
commit 244a88e63c
4 changed files with 77 additions and 33 deletions

View File

@@ -240,7 +240,6 @@ function parseEbayListings(
!text.includes("core") &&
!text.includes("ram") &&
!text.includes("ssd") &&
!/\d{4}/.test(text) && // Avoid years like "2024"
!text.includes('"') // Avoid measurements
) {
priceElement = el;

View File

@@ -890,30 +890,15 @@ export function parseFacebookAds(
: priceObj.amount;
cents = Math.round(dollars * 100);
} else if (priceObj.amount_with_offset_in_currency != null) {
// Fallback: try to extract cents from amount_with_offset_in_currency
// This appears to use some exchange rate/multiplier format
const encodedAmount = Number(priceObj.amount_with_offset_in_currency);
if (!Number.isNaN(encodedAmount) && encodedAmount > 0) {
// Estimate roughly - this field doesn't contain real cents
// Use formatted_amount to get the actual dollar amount
if (priceObj.formatted_amount) {
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (match) {
const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
if (!Number.isNaN(dollars)) {
cents = Math.round(dollars * 100);
} else {
cents = encodedAmount; // fallback
}
} else {
cents = encodedAmount; // fallback
}
} else {
cents = encodedAmount; // fallback
}
} else {
continue; // Invalid price
}
if (!priceObj.formatted_amount) continue;
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (!match) continue;
const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
if (Number.isNaN(dollars)) continue;
cents = Math.round(dollars * 100);
} else {
continue; // No price available
}
@@ -977,7 +962,9 @@ export function parseFacebookAds(
};
results.push(listingDetails);
} catch {}
} catch (error) {
console.warn("Failed to parse Facebook ad:", error);
}
}
return results;

View File

@@ -12,18 +12,14 @@ type IsExact<T, U> =
: false;
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
const unstableEbayItemsPromise = fetchEbayItems(
"laptop",
1000,
{},
{ hideUnstableResults: true },
);
const getUnstableEbayItems = async () =>
fetchEbayItems("laptop", 1000, {}, { hideUnstableResults: true });
type _EbayDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
>;
type _EbayUnstableReturn = Assert<
IsExact<
Awaited<typeof unstableEbayItemsPromise>,
Awaited<ReturnType<typeof getUnstableEbayItems>>,
UnstableListingBuckets<EbayListingDetails>
>
>;
@@ -302,6 +298,38 @@ describe("eBay Scraper Cookie Handling", () => {
]);
});
test("accepts higher fallback prices without price classes", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Studio Microphone Bundle</h3>
<div>CA $2500.00</div>
</li>
</body></html>
`),
}),
) as typeof fetch;
const results = await fetchEbayItems("microphone", 1000, {
keywords: ["microphone"],
});
expect(results).toEqual([
expect.objectContaining({
title: "Studio Microphone Bundle",
listingPrice: expect.objectContaining({
amountFormatted: "CA $2500.00",
cents: 250000,
}),
}),
]);
});
test("retains free items when the requested price range includes zero", async () => {
global.fetch = mock(() =>
Promise.resolve({

View File

@@ -1613,6 +1613,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
});
test("should handle malformed ads gracefully", () => {
const originalWarn = console.warn;
const warnMock = mock(() => {});
console.warn = warnMock;
const ads = [
{
node: {
@@ -1638,6 +1642,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const results = parseFacebookAds(ads);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Valid Ad");
expect(warnMock).toHaveBeenCalledTimes(1);
console.warn = originalWarn;
});
test("parses formatted fallback prices with multiple commas", () => {
@@ -1667,6 +1674,29 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
]);
});
test("does not trust amount_with_offset_in_currency without a parseable formatted price", () => {
const ads = [
{
node: {
listing: {
id: "bad-offset",
marketplace_listing_title: "Broken Price Listing",
listing_price: {
amount_with_offset_in_currency: "123456789",
formatted_amount: "price unavailable",
currency: "CAD",
},
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
expect(results).toEqual([]);
});
test("keeps valid free search listings", () => {
const ads = [
{