fix: tighten scraper parsing behavior
This commit is contained in:
@@ -75,6 +75,58 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
test("deduplicates repeated item links from the same card", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"><span>Open</span></a>
|
||||
<a href="/itm/123"><span>Image</span></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0]).toEqual(
|
||||
expect.objectContaining({ url: "https://www.ebay.ca/itm/123" }),
|
||||
);
|
||||
});
|
||||
|
||||
test("treats bare dollar prices as CAD on ebay.ca", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">$100.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({ currency: "CAD" }),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
|
||||
@@ -104,7 +104,7 @@ describe("URL Construction", () => {
|
||||
sortOrder: "desc",
|
||||
});
|
||||
|
||||
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
|
||||
expect(url).toContain("b-phones/gta/iphone/k0c132l1700272");
|
||||
expect(url).toContain("sort=relevancyDesc");
|
||||
expect(url).toContain("order=DESC");
|
||||
});
|
||||
@@ -141,6 +141,7 @@ describe("URL Construction", () => {
|
||||
category: "phones",
|
||||
});
|
||||
|
||||
expect(url).toContain("/b-phones/toronto/");
|
||||
expect(url).toContain("k0c132l1700273"); // phones + toronto
|
||||
});
|
||||
});
|
||||
@@ -184,6 +185,110 @@ describe("Error Classes", () => {
|
||||
});
|
||||
|
||||
describe("fetchKijijiItems", () => {
|
||||
test("filters fetched listings by priceMin and priceMax", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": {
|
||||
url: "/v-low/k0l0",
|
||||
title: "Low Listing",
|
||||
},
|
||||
"Listing:2": {
|
||||
url: "/v-mid/k0l0",
|
||||
title: "Mid Listing",
|
||||
},
|
||||
"Listing:3": {
|
||||
url: "/v-high/k0l0",
|
||||
title: "High Listing",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, amount: number, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
global.fetch = mock((input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/k0c0l1700272")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-low/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Low Listing", 7000, "v-low/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-mid/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Mid Listing", 9000, "v-mid/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-high/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("High Listing", 12000, "v-high/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
1000,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1, priceMin: 8000, priceMax: 10000 },
|
||||
);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({ title: "Mid Listing" }),
|
||||
]);
|
||||
});
|
||||
|
||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
@@ -237,7 +342,7 @@ describe("fetchKijijiItems", () => {
|
||||
global.fetch = mock((input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/b-buy-sell/")) {
|
||||
if (url.includes("/k0c0l1700272")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
|
||||
Reference in New Issue
Block a user