feat: add unstable mode to scraper results
This commit is contained in:
@@ -38,4 +38,49 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
"No valid eBay cookies found in EBAY_COOKIE. eBay may block requests without a raw Cookie header string.",
|
||||
);
|
||||
});
|
||||
|
||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/1"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/2"></a>
|
||||
<h3>Another Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $110.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/3"></a>
|
||||
<h3>Cheap Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $70.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems(
|
||||
"laptop",
|
||||
1000,
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({ title: "Stable Laptop Bundle" }),
|
||||
expect.objectContaining({ title: "Another Laptop Bundle" }),
|
||||
],
|
||||
unstableResults: [
|
||||
expect.objectContaining({ title: "Cheap Laptop Bundle" }),
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
extractFacebookBootstrapCandidates,
|
||||
extractFacebookItemData,
|
||||
extractFacebookMarketplaceData,
|
||||
default as fetchFacebookItems,
|
||||
fetchFacebookItem,
|
||||
parseFacebookAds,
|
||||
parseFacebookCookieString,
|
||||
@@ -367,6 +368,143 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("fetchFacebookItems", () => {
|
||||
let previousCookie: string | undefined;
|
||||
|
||||
beforeEach(() => {
|
||||
previousCookie = process.env.FACEBOOK_COOKIE;
|
||||
process.env.FACEBOOK_COOKIE = "c_user=12345; xs=abc123";
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (previousCookie === undefined) {
|
||||
delete process.env.FACEBOOK_COOKIE;
|
||||
} else {
|
||||
process.env.FACEBOOK_COOKIE = previousCookie;
|
||||
}
|
||||
});
|
||||
|
||||
test("returns an array by default", async () => {
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Stable Chair Listing",
|
||||
listing_price: {
|
||||
amount: "120.00",
|
||||
formatted_amount: "CA$120",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})}</script></body></html>`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(mockSearchHtml),
|
||||
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
|
||||
|
||||
expect(Array.isArray(results)).toBe(true);
|
||||
expect(results).toHaveLength(1);
|
||||
});
|
||||
|
||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Stable Chair Listing",
|
||||
listing_price: {
|
||||
amount: "100.00",
|
||||
formatted_amount: "CA$100",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "Another Stable Chair",
|
||||
listing_price: {
|
||||
amount: "110.00",
|
||||
formatted_amount: "CA$110",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "3",
|
||||
marketplace_listing_title: "Suspiciously Cheap Chair",
|
||||
listing_price: {
|
||||
amount: "70.00",
|
||||
formatted_amount: "CA$70",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})}</script></body></html>`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(mockSearchHtml),
|
||||
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems("chair", 1, "toronto", 1, {
|
||||
hideUnstableResults: true,
|
||||
});
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [expect.objectContaining({ title: "Stable Chair Listing" })],
|
||||
unstableResults: [
|
||||
expect.objectContaining({ title: "Suspiciously Cheap Chair" }),
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Data Extraction", () => {
|
||||
describe("extractFacebookItemData", () => {
|
||||
test("extracts item details from Comet permalink bootstrap candidates", () => {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import {
|
||||
buildSearchUrl,
|
||||
default as fetchKijijiItems,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
@@ -9,6 +10,18 @@ import {
|
||||
ValidationError,
|
||||
} from "../src/scrapers/kijiji";
|
||||
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
beforeEach(() => {
|
||||
global.fetch = mock(() => {
|
||||
throw new Error("fetch should be mocked in individual tests");
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
});
|
||||
|
||||
describe("Location and Category Resolution", () => {
|
||||
describe("resolveLocationId", () => {
|
||||
test("should return numeric IDs as-is", () => {
|
||||
@@ -155,3 +168,124 @@ describe("Error Classes", () => {
|
||||
expect(error.name).toBe("ValidationError");
|
||||
});
|
||||
});
|
||||
|
||||
describe("fetchKijijiItems", () => {
|
||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": {
|
||||
url: "/v-stable-one/k0l0",
|
||||
title: "Stable Listing One",
|
||||
},
|
||||
"Listing:2": {
|
||||
url: "/v-stable-two/k0l0",
|
||||
title: "Stable Listing Two",
|
||||
},
|
||||
"Listing:3": {
|
||||
url: "/v-unstable/k0l0",
|
||||
title: "Unstable Listing",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, amount: number, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
global.fetch = mock((input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/b-buy-sell/")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-one/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-two/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-unstable/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Unstable Listing", 7000, "v-unstable/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
1000,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1 },
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({ title: "Stable Listing One" }),
|
||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||
],
|
||||
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user