fix: tighten scraper edge case handling
This commit is contained in:
@@ -177,6 +177,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
try {
|
||||
const result = await fetchFacebookItem("123");
|
||||
expect(result).toBeNull();
|
||||
expect(global.fetch).toHaveBeenCalledTimes(1);
|
||||
expect(warnMock).toHaveBeenCalledWith(
|
||||
"Authentication error: Invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
|
||||
);
|
||||
@@ -309,6 +310,54 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
expect(result?.listingStatus).toBe("SOLD");
|
||||
});
|
||||
|
||||
test("should still parse sold items when structured data exists", async () => {
|
||||
const soldStructuredHtml = `
|
||||
<html><body>
|
||||
<div>This item has been sold</div>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<script>
|
||||
${JSON.stringify({
|
||||
payload: {
|
||||
listing: {
|
||||
id: "457",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Structured Sold Item",
|
||||
formatted_price: { text: "CA$90" },
|
||||
listing_price: {
|
||||
amount: "90.00",
|
||||
currency: "CAD",
|
||||
amount_with_offset: "90.00",
|
||||
},
|
||||
is_sold: true,
|
||||
is_live: false,
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(soldStructuredHtml),
|
||||
url: "https://www.facebook.com/marketplace/item/457/",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await fetchFacebookItem("457");
|
||||
|
||||
expect(result).toEqual(
|
||||
expect.objectContaining({
|
||||
title: "Structured Sold Item",
|
||||
listingStatus: "SOLD",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
test("should handle successful item extraction", async () => {
|
||||
const mockData = {
|
||||
require: [
|
||||
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
default as fetchKijijiItems,
|
||||
type DetailedListing,
|
||||
NetworkError,
|
||||
parseDetailedListing,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
resolveCategoryId,
|
||||
@@ -124,6 +125,7 @@ describe("URL Construction", () => {
|
||||
sortBy: "date",
|
||||
sortOrder: "asc",
|
||||
});
|
||||
expect(dateUrl.match(/sort=/g)?.length).toBe(1);
|
||||
expect(dateUrl).toContain("sort=DATE");
|
||||
expect(dateUrl).toContain("order=ASC");
|
||||
|
||||
@@ -289,6 +291,141 @@ describe("fetchKijijiItems", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
test("classifies unstable mode using all parsed listings before price filtering", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": { url: "/v-stable-one/k0l0", title: "Stable Listing One" },
|
||||
"Listing:2": { url: "/v-stable-two/k0l0", title: "Stable Listing Two" },
|
||||
"Listing:3": { url: "/v-unstable/k0l0", title: "Unstable Listing" },
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, amount: number, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
global.fetch = mock((input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/k0c0l1700272")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-one/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-two/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-unstable/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Unstable Listing", 7000, "v-unstable/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
1000,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1, priceMin: 8000 },
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({ title: "Stable Listing One" }),
|
||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||
],
|
||||
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
|
||||
});
|
||||
});
|
||||
|
||||
test("parseDetailedListing ignores non-root listing-like entities", async () => {
|
||||
const html = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"SearchListingCard:1": {
|
||||
url: "/v-card/k0l0",
|
||||
title: "Card Listing",
|
||||
},
|
||||
"Listing:detail": {
|
||||
url: "/v-detailed/k0l0",
|
||||
title: "Detailed Listing",
|
||||
price: { amount: 10000, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const result = await parseDetailedListing(html, "https://www.kijiji.ca");
|
||||
|
||||
expect(result).toEqual(
|
||||
expect.objectContaining({ title: "Detailed Listing" }),
|
||||
);
|
||||
});
|
||||
|
||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
|
||||
Reference in New Issue
Block a user