fix: tighten ebay result parsing
This commit is contained in:
@@ -43,7 +43,7 @@ const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C)\s*\$|\s*[$£€¥])/u;
|
|||||||
function canonicalizeEbayItemUrl(url: string): string {
|
function canonicalizeEbayItemUrl(url: string): string {
|
||||||
try {
|
try {
|
||||||
const parsed = new URL(url, "https://www.ebay.ca");
|
const parsed = new URL(url, "https://www.ebay.ca");
|
||||||
const match = parsed.pathname.match(/\/itm\/[^/?#]+/);
|
const match = parsed.pathname.match(/\/itm\/(?:[^/?#]+\/)?\d+/);
|
||||||
return match ? `${parsed.origin}${match[0]}` : `${parsed.origin}${parsed.pathname}`;
|
return match ? `${parsed.origin}${match[0]}` : `${parsed.origin}${parsed.pathname}`;
|
||||||
} catch {
|
} catch {
|
||||||
return url;
|
return url;
|
||||||
@@ -267,8 +267,7 @@ function parseEbayListings(
|
|||||||
if (
|
if (
|
||||||
text &&
|
text &&
|
||||||
EBAY_PRICE_TEXT_RE.test(text) &&
|
EBAY_PRICE_TEXT_RE.test(text) &&
|
||||||
text.length < 50 &&
|
text.length < 50
|
||||||
!/\d{4}/.test(text)
|
|
||||||
) {
|
) {
|
||||||
actualPrices.push(el);
|
actualPrices.push(el);
|
||||||
}
|
}
|
||||||
@@ -512,7 +511,7 @@ export default async function fetchEbayItems(
|
|||||||
// Filter by price range (additional safety check)
|
// Filter by price range (additional safety check)
|
||||||
const filteredListings = listings.filter((listing) => {
|
const filteredListings = listings.filter((listing) => {
|
||||||
const cents = listing.listingPrice?.cents;
|
const cents = listing.listingPrice?.cents;
|
||||||
return cents && cents >= minPrice && cents <= maxPrice;
|
return typeof cents === "number" && cents >= minPrice && cents <= maxPrice;
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
||||||
|
|||||||
@@ -131,6 +131,48 @@ describe("eBay Scraper Cookie Handling", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("deduplicates tracking variants of SEO-style item URLs", async () => {
|
||||||
|
global.fetch = mock(() =>
|
||||||
|
Promise.resolve({
|
||||||
|
ok: true,
|
||||||
|
text: () =>
|
||||||
|
Promise.resolve(`
|
||||||
|
<html><body>
|
||||||
|
<li class="s-item">
|
||||||
|
<a href="/itm/title-slug/1234567890?_trkparms=foo"></a>
|
||||||
|
<h3>Stable Laptop Bundle</h3>
|
||||||
|
<span class="s-item__price">CA $100.00</span>
|
||||||
|
</li>
|
||||||
|
<li class="s-item">
|
||||||
|
<a href="https://www.ebay.ca/itm/title-slug/1234567890?hash=item123"></a>
|
||||||
|
<h3>Stable Laptop Bundle</h3>
|
||||||
|
<span class="s-item__price">CA $100.00</span>
|
||||||
|
</li>
|
||||||
|
<li class="s-item">
|
||||||
|
<a href="https://www.ebay.ca/itm/title-slug/9999999999?hash=item999"></a>
|
||||||
|
<h3>Another Laptop Bundle</h3>
|
||||||
|
<span class="s-item__price">CA $110.00</span>
|
||||||
|
</li>
|
||||||
|
</body></html>
|
||||||
|
`),
|
||||||
|
}),
|
||||||
|
) as typeof fetch;
|
||||||
|
|
||||||
|
const results = await fetchEbayItems("laptop", 1000);
|
||||||
|
|
||||||
|
expect(results).toHaveLength(2);
|
||||||
|
expect(results[0]).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
url: "https://www.ebay.ca/itm/title-slug/1234567890?_trkparms=foo",
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
expect(results[1]).toEqual(
|
||||||
|
expect.objectContaining({
|
||||||
|
url: "https://www.ebay.ca/itm/title-slug/9999999999?hash=item999",
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
test("treats bare dollar prices as CAD on ebay.ca", async () => {
|
test("treats bare dollar prices as CAD on ebay.ca", async () => {
|
||||||
global.fetch = mock(() =>
|
global.fetch = mock(() =>
|
||||||
Promise.resolve({
|
Promise.resolve({
|
||||||
@@ -189,6 +231,68 @@ describe("eBay Scraper Cookie Handling", () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("prefers discounted Canadian prices that contain four consecutive digits", async () => {
|
||||||
|
global.fetch = mock(() =>
|
||||||
|
Promise.resolve({
|
||||||
|
ok: true,
|
||||||
|
text: () =>
|
||||||
|
Promise.resolve(`
|
||||||
|
<html><body>
|
||||||
|
<li class="s-item">
|
||||||
|
<a href="/itm/123"></a>
|
||||||
|
<h3>Stable Laptop Bundle</h3>
|
||||||
|
<span class="s-item__price">
|
||||||
|
<s>CA $1500.00</s>
|
||||||
|
<span>CA $1000.00</span>
|
||||||
|
</span>
|
||||||
|
</li>
|
||||||
|
</body></html>
|
||||||
|
`),
|
||||||
|
}),
|
||||||
|
) as typeof fetch;
|
||||||
|
|
||||||
|
const results = await fetchEbayItems("laptop", 1000);
|
||||||
|
|
||||||
|
expect(results).toEqual([
|
||||||
|
expect.objectContaining({
|
||||||
|
listingPrice: expect.objectContaining({
|
||||||
|
amountFormatted: "CA $1000.00",
|
||||||
|
cents: 100000,
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("retains free items when the requested price range includes zero", async () => {
|
||||||
|
global.fetch = mock(() =>
|
||||||
|
Promise.resolve({
|
||||||
|
ok: true,
|
||||||
|
text: () =>
|
||||||
|
Promise.resolve(`
|
||||||
|
<html><body>
|
||||||
|
<li class="s-item">
|
||||||
|
<a href="/itm/123"></a>
|
||||||
|
<h3>Free Laptop Bundle</h3>
|
||||||
|
<span class="s-item__price">$0.00</span>
|
||||||
|
</li>
|
||||||
|
</body></html>
|
||||||
|
`),
|
||||||
|
}),
|
||||||
|
) as typeof fetch;
|
||||||
|
|
||||||
|
const results = await fetchEbayItems("laptop", 1000, {
|
||||||
|
minPrice: 0,
|
||||||
|
maxPrice: 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results).toEqual([
|
||||||
|
expect.objectContaining({
|
||||||
|
title: "Free Laptop Bundle",
|
||||||
|
listingPrice: expect.objectContaining({ cents: 0 }),
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||||
global.fetch = mock(() =>
|
global.fetch = mock(() =>
|
||||||
Promise.resolve({
|
Promise.resolve({
|
||||||
|
|||||||
Reference in New Issue
Block a user