feat: add unstable mode to scraper results

This commit is contained in:
2026-04-22 23:23:31 -04:00
parent 8141de5b4b
commit 1ee41fb346
6 changed files with 364 additions and 12 deletions

View File

@@ -38,4 +38,49 @@ describe("eBay Scraper Cookie Handling", () => {
"No valid eBay cookies found in EBAY_COOKIE. eBay may block requests without a raw Cookie header string.",
);
});
test("returns results and unstableResults when unstable mode is enabled", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="https://www.ebay.ca/itm/1"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">CA $100.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/2"></a>
<h3>Another Laptop Bundle</h3>
<span class="s-item__price">CA $110.00</span>
</li>
<li class="s-item">
<a href="https://www.ebay.ca/itm/3"></a>
<h3>Cheap Laptop Bundle</h3>
<span class="s-item__price">CA $70.00</span>
</li>
</body></html>
`),
}),
) as typeof fetch;
const results = await fetchEbayItems(
"laptop",
1000,
{},
{ hideUnstableResults: true },
);
expect(results).toEqual({
results: [
expect.objectContaining({ title: "Stable Laptop Bundle" }),
expect.objectContaining({ title: "Another Laptop Bundle" }),
],
unstableResults: [
expect.objectContaining({ title: "Cheap Laptop Bundle" }),
],
});
});
});

View File

@@ -5,6 +5,7 @@ import {
extractFacebookBootstrapCandidates,
extractFacebookItemData,
extractFacebookMarketplaceData,
default as fetchFacebookItems,
fetchFacebookItem,
parseFacebookAds,
parseFacebookCookieString,
@@ -367,6 +368,143 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
});
});
describe("fetchFacebookItems", () => {
let previousCookie: string | undefined;
beforeEach(() => {
previousCookie = process.env.FACEBOOK_COOKIE;
process.env.FACEBOOK_COOKIE = "c_user=12345; xs=abc123";
});
afterEach(() => {
if (previousCookie === undefined) {
delete process.env.FACEBOOK_COOKIE;
} else {
process.env.FACEBOOK_COOKIE = previousCookie;
}
});
test("returns an array by default", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Stable Chair Listing",
listing_price: {
amount: "120.00",
formatted_amount: "CA$120",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
],
},
})}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(mockSearchHtml),
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
expect(Array.isArray(results)).toBe(true);
expect(results).toHaveLength(1);
});
test("returns results and unstableResults when unstable mode is enabled", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Stable Chair Listing",
listing_price: {
amount: "100.00",
formatted_amount: "CA$100",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Another Stable Chair",
listing_price: {
amount: "110.00",
formatted_amount: "CA$110",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "3",
marketplace_listing_title: "Suspiciously Cheap Chair",
listing_price: {
amount: "70.00",
formatted_amount: "CA$70",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
],
},
})}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(mockSearchHtml),
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems("chair", 1, "toronto", 1, {
hideUnstableResults: true,
});
expect(results).toEqual({
results: [expect.objectContaining({ title: "Stable Chair Listing" })],
unstableResults: [
expect.objectContaining({ title: "Suspiciously Cheap Chair" }),
],
});
});
});
describe("Data Extraction", () => {
describe("extractFacebookItemData", () => {
test("extracts item details from Comet permalink bootstrap candidates", () => {

View File

@@ -1,6 +1,7 @@
import { describe, expect, test } from "bun:test";
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import {
buildSearchUrl,
default as fetchKijijiItems,
NetworkError,
ParseError,
RateLimitError,
@@ -9,6 +10,18 @@ import {
ValidationError,
} from "../src/scrapers/kijiji";
const originalFetch = global.fetch;
beforeEach(() => {
global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests");
});
});
afterEach(() => {
global.fetch = originalFetch;
});
describe("Location and Category Resolution", () => {
describe("resolveLocationId", () => {
test("should return numeric IDs as-is", () => {
@@ -155,3 +168,124 @@ describe("Error Classes", () => {
expect(error.name).toBe("ValidationError");
});
});
describe("fetchKijijiItems", () => {
test("returns results and unstableResults when unstable mode is enabled", async () => {
const searchHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:1": {
url: "/v-stable-one/k0l0",
title: "Stable Listing One",
},
"Listing:2": {
url: "/v-stable-two/k0l0",
title: "Stable Listing Two",
},
"Listing:3": {
url: "/v-unstable/k0l0",
title: "Unstable Listing",
},
},
},
},
})}
</script>
</html>
`;
const listingHtml = (title: string, amount: number, slug: string) => `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:detail": {
url: `/${slug}`,
title,
price: { amount, currency: "CAD", type: "FIXED" },
type: "OFFER",
status: "ACTIVE",
},
},
},
},
})}
</script>
</html>
`;
global.fetch = mock((input: string | URL | Request) => {
const url = typeof input === "string" ? input : input.toString();
if (url.includes("/b-buy-sell/")) {
return Promise.resolve({
ok: true,
text: () => Promise.resolve(searchHtml),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-one/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"),
),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-stable-two/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"),
),
headers: { get: () => null },
url,
});
}
if (url.endsWith("/v-unstable/k0l0")) {
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
listingHtml("Unstable Listing", 7000, "v-unstable/k0l0"),
),
headers: { get: () => null },
url,
});
}
throw new Error(`Unexpected URL: ${url}`);
}) as typeof fetch;
const results = await fetchKijijiItems(
"phone",
1000,
"https://www.kijiji.ca",
{ maxPages: 1 },
{},
{ hideUnstableResults: true },
);
expect(results).toEqual({
results: [
expect.objectContaining({ title: "Stable Listing One" }),
expect.objectContaining({ title: "Stable Listing Two" }),
],
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
});
});
});