fix: harden scraper price parsing
This commit is contained in:
@@ -240,7 +240,6 @@ function parseEbayListings(
|
||||
!text.includes("core") &&
|
||||
!text.includes("ram") &&
|
||||
!text.includes("ssd") &&
|
||||
!/\d{4}/.test(text) && // Avoid years like "2024"
|
||||
!text.includes('"') // Avoid measurements
|
||||
) {
|
||||
priceElement = el;
|
||||
|
||||
@@ -890,30 +890,15 @@ export function parseFacebookAds(
|
||||
: priceObj.amount;
|
||||
cents = Math.round(dollars * 100);
|
||||
} else if (priceObj.amount_with_offset_in_currency != null) {
|
||||
// Fallback: try to extract cents from amount_with_offset_in_currency
|
||||
// This appears to use some exchange rate/multiplier format
|
||||
const encodedAmount = Number(priceObj.amount_with_offset_in_currency);
|
||||
if (!Number.isNaN(encodedAmount) && encodedAmount > 0) {
|
||||
// Estimate roughly - this field doesn't contain real cents
|
||||
// Use formatted_amount to get the actual dollar amount
|
||||
if (priceObj.formatted_amount) {
|
||||
if (!priceObj.formatted_amount) continue;
|
||||
|
||||
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
|
||||
if (match) {
|
||||
if (!match) continue;
|
||||
|
||||
const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
|
||||
if (!Number.isNaN(dollars)) {
|
||||
if (Number.isNaN(dollars)) continue;
|
||||
|
||||
cents = Math.round(dollars * 100);
|
||||
} else {
|
||||
cents = encodedAmount; // fallback
|
||||
}
|
||||
} else {
|
||||
cents = encodedAmount; // fallback
|
||||
}
|
||||
} else {
|
||||
cents = encodedAmount; // fallback
|
||||
}
|
||||
} else {
|
||||
continue; // Invalid price
|
||||
}
|
||||
} else {
|
||||
continue; // No price available
|
||||
}
|
||||
@@ -977,7 +962,9 @@ export function parseFacebookAds(
|
||||
};
|
||||
|
||||
results.push(listingDetails);
|
||||
} catch {}
|
||||
} catch (error) {
|
||||
console.warn("Failed to parse Facebook ad:", error);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
|
||||
@@ -12,18 +12,14 @@ type IsExact<T, U> =
|
||||
: false;
|
||||
|
||||
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
|
||||
const unstableEbayItemsPromise = fetchEbayItems(
|
||||
"laptop",
|
||||
1000,
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
const getUnstableEbayItems = async () =>
|
||||
fetchEbayItems("laptop", 1000, {}, { hideUnstableResults: true });
|
||||
type _EbayDefaultReturn = Assert<
|
||||
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
|
||||
>;
|
||||
type _EbayUnstableReturn = Assert<
|
||||
IsExact<
|
||||
Awaited<typeof unstableEbayItemsPromise>,
|
||||
Awaited<ReturnType<typeof getUnstableEbayItems>>,
|
||||
UnstableListingBuckets<EbayListingDetails>
|
||||
>
|
||||
>;
|
||||
@@ -302,6 +298,38 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
test("accepts higher fallback prices without price classes", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Studio Microphone Bundle</h3>
|
||||
<div>CA $2500.00</div>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("microphone", 1000, {
|
||||
keywords: ["microphone"],
|
||||
});
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
title: "Studio Microphone Bundle",
|
||||
listingPrice: expect.objectContaining({
|
||||
amountFormatted: "CA $2500.00",
|
||||
cents: 250000,
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("retains free items when the requested price range includes zero", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
|
||||
@@ -1613,6 +1613,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
});
|
||||
|
||||
test("should handle malformed ads gracefully", () => {
|
||||
const originalWarn = console.warn;
|
||||
const warnMock = mock(() => {});
|
||||
console.warn = warnMock;
|
||||
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
@@ -1638,6 +1642,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
const results = parseFacebookAds(ads);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("Valid Ad");
|
||||
expect(warnMock).toHaveBeenCalledTimes(1);
|
||||
|
||||
console.warn = originalWarn;
|
||||
});
|
||||
|
||||
test("parses formatted fallback prices with multiple commas", () => {
|
||||
@@ -1667,6 +1674,29 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
test("does not trust amount_with_offset_in_currency without a parseable formatted price", () => {
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "bad-offset",
|
||||
marketplace_listing_title: "Broken Price Listing",
|
||||
listing_price: {
|
||||
amount_with_offset_in_currency: "123456789",
|
||||
formatted_amount: "price unavailable",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("keeps valid free search listings", () => {
|
||||
const ads = [
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user