fix: harden scraper price parsing
This commit is contained in:
@@ -240,7 +240,6 @@ function parseEbayListings(
|
|||||||
!text.includes("core") &&
|
!text.includes("core") &&
|
||||||
!text.includes("ram") &&
|
!text.includes("ram") &&
|
||||||
!text.includes("ssd") &&
|
!text.includes("ssd") &&
|
||||||
!/\d{4}/.test(text) && // Avoid years like "2024"
|
|
||||||
!text.includes('"') // Avoid measurements
|
!text.includes('"') // Avoid measurements
|
||||||
) {
|
) {
|
||||||
priceElement = el;
|
priceElement = el;
|
||||||
|
|||||||
@@ -890,30 +890,15 @@ export function parseFacebookAds(
|
|||||||
: priceObj.amount;
|
: priceObj.amount;
|
||||||
cents = Math.round(dollars * 100);
|
cents = Math.round(dollars * 100);
|
||||||
} else if (priceObj.amount_with_offset_in_currency != null) {
|
} else if (priceObj.amount_with_offset_in_currency != null) {
|
||||||
// Fallback: try to extract cents from amount_with_offset_in_currency
|
if (!priceObj.formatted_amount) continue;
|
||||||
// This appears to use some exchange rate/multiplier format
|
|
||||||
const encodedAmount = Number(priceObj.amount_with_offset_in_currency);
|
|
||||||
if (!Number.isNaN(encodedAmount) && encodedAmount > 0) {
|
|
||||||
// Estimate roughly - this field doesn't contain real cents
|
|
||||||
// Use formatted_amount to get the actual dollar amount
|
|
||||||
if (priceObj.formatted_amount) {
|
|
||||||
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
|
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
|
||||||
if (match) {
|
if (!match) continue;
|
||||||
|
|
||||||
const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
|
const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
|
||||||
if (!Number.isNaN(dollars)) {
|
if (Number.isNaN(dollars)) continue;
|
||||||
|
|
||||||
cents = Math.round(dollars * 100);
|
cents = Math.round(dollars * 100);
|
||||||
} else {
|
|
||||||
cents = encodedAmount; // fallback
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cents = encodedAmount; // fallback
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cents = encodedAmount; // fallback
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
continue; // Invalid price
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
continue; // No price available
|
continue; // No price available
|
||||||
}
|
}
|
||||||
@@ -977,7 +962,9 @@ export function parseFacebookAds(
|
|||||||
};
|
};
|
||||||
|
|
||||||
results.push(listingDetails);
|
results.push(listingDetails);
|
||||||
} catch {}
|
} catch (error) {
|
||||||
|
console.warn("Failed to parse Facebook ad:", error);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
|
|||||||
@@ -12,18 +12,14 @@ type IsExact<T, U> =
|
|||||||
: false;
|
: false;
|
||||||
|
|
||||||
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
|
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
|
||||||
const unstableEbayItemsPromise = fetchEbayItems(
|
const getUnstableEbayItems = async () =>
|
||||||
"laptop",
|
fetchEbayItems("laptop", 1000, {}, { hideUnstableResults: true });
|
||||||
1000,
|
|
||||||
{},
|
|
||||||
{ hideUnstableResults: true },
|
|
||||||
);
|
|
||||||
type _EbayDefaultReturn = Assert<
|
type _EbayDefaultReturn = Assert<
|
||||||
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
|
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
|
||||||
>;
|
>;
|
||||||
type _EbayUnstableReturn = Assert<
|
type _EbayUnstableReturn = Assert<
|
||||||
IsExact<
|
IsExact<
|
||||||
Awaited<typeof unstableEbayItemsPromise>,
|
Awaited<ReturnType<typeof getUnstableEbayItems>>,
|
||||||
UnstableListingBuckets<EbayListingDetails>
|
UnstableListingBuckets<EbayListingDetails>
|
||||||
>
|
>
|
||||||
>;
|
>;
|
||||||
@@ -302,6 +298,38 @@ describe("eBay Scraper Cookie Handling", () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("accepts higher fallback prices without price classes", async () => {
|
||||||
|
global.fetch = mock(() =>
|
||||||
|
Promise.resolve({
|
||||||
|
ok: true,
|
||||||
|
text: () =>
|
||||||
|
Promise.resolve(`
|
||||||
|
<html><body>
|
||||||
|
<li class="s-item">
|
||||||
|
<a href="/itm/123"></a>
|
||||||
|
<h3>Studio Microphone Bundle</h3>
|
||||||
|
<div>CA $2500.00</div>
|
||||||
|
</li>
|
||||||
|
</body></html>
|
||||||
|
`),
|
||||||
|
}),
|
||||||
|
) as typeof fetch;
|
||||||
|
|
||||||
|
const results = await fetchEbayItems("microphone", 1000, {
|
||||||
|
keywords: ["microphone"],
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results).toEqual([
|
||||||
|
expect.objectContaining({
|
||||||
|
title: "Studio Microphone Bundle",
|
||||||
|
listingPrice: expect.objectContaining({
|
||||||
|
amountFormatted: "CA $2500.00",
|
||||||
|
cents: 250000,
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
test("retains free items when the requested price range includes zero", async () => {
|
test("retains free items when the requested price range includes zero", async () => {
|
||||||
global.fetch = mock(() =>
|
global.fetch = mock(() =>
|
||||||
Promise.resolve({
|
Promise.resolve({
|
||||||
|
|||||||
@@ -1613,6 +1613,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
test("should handle malformed ads gracefully", () => {
|
test("should handle malformed ads gracefully", () => {
|
||||||
|
const originalWarn = console.warn;
|
||||||
|
const warnMock = mock(() => {});
|
||||||
|
console.warn = warnMock;
|
||||||
|
|
||||||
const ads = [
|
const ads = [
|
||||||
{
|
{
|
||||||
node: {
|
node: {
|
||||||
@@ -1638,6 +1642,9 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
|||||||
const results = parseFacebookAds(ads);
|
const results = parseFacebookAds(ads);
|
||||||
expect(results).toHaveLength(1);
|
expect(results).toHaveLength(1);
|
||||||
expect(results[0].title).toBe("Valid Ad");
|
expect(results[0].title).toBe("Valid Ad");
|
||||||
|
expect(warnMock).toHaveBeenCalledTimes(1);
|
||||||
|
|
||||||
|
console.warn = originalWarn;
|
||||||
});
|
});
|
||||||
|
|
||||||
test("parses formatted fallback prices with multiple commas", () => {
|
test("parses formatted fallback prices with multiple commas", () => {
|
||||||
@@ -1667,6 +1674,29 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("does not trust amount_with_offset_in_currency without a parseable formatted price", () => {
|
||||||
|
const ads = [
|
||||||
|
{
|
||||||
|
node: {
|
||||||
|
listing: {
|
||||||
|
id: "bad-offset",
|
||||||
|
marketplace_listing_title: "Broken Price Listing",
|
||||||
|
listing_price: {
|
||||||
|
amount_with_offset_in_currency: "123456789",
|
||||||
|
formatted_amount: "price unavailable",
|
||||||
|
currency: "CAD",
|
||||||
|
},
|
||||||
|
is_live: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const results = parseFacebookAds(ads);
|
||||||
|
|
||||||
|
expect(results).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
test("keeps valid free search listings", () => {
|
test("keeps valid free search listings", () => {
|
||||||
const ads = [
|
const ads = [
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user