feat: port upstream scraper improvements to monorepo
Kijiji improvements: - Add error classes: NetworkError, ParseError, RateLimitError, ValidationError - Add exponential backoff with jitter for retries - Add request timeout (30s abort) - Add pagination support (SearchOptions.maxPages) - Add location/category mappings and resolution functions - Add enhanced DetailedListing interface with images, seller info, attributes - Add GraphQL client for seller details Facebook improvements: - Add parseFacebookCookieString() for parsing cookie strings - Add ensureFacebookCookies() with env var fallback - Add extractFacebookItemData() with multiple extraction paths - Add fetchFacebookItem() for individual item fetching - Add extraction metrics and API stability monitoring - Add vehicle-specific field extraction - Improve error handling with specific guidance for auth errors Shared utilities: - Update http.ts with new error classes and improved fetchHtml Documentation: - Port KIJIJI.md, FMARKETPLACE.md, AGENTS.md from upstream Tests: - Port kijiji-core, kijiji-integration, kijiji-utils tests - Port facebook-core, facebook-integration tests - Add test setup file Scripts: - Port parse-facebook-cookies.ts script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
712
packages/core/test/facebook-integration.test.ts
Normal file
712
packages/core/test/facebook-integration.test.ts
Normal file
@@ -0,0 +1,712 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook";
|
||||
|
||||
// Mock fetch globally
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
beforeEach(() => {
|
||||
global.fetch = mock(() => {
|
||||
throw new Error("fetch should be mocked in individual tests");
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
});
|
||||
|
||||
describe("Main Search Function", () => {
|
||||
const mockCookies = JSON.stringify([
|
||||
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
|
||||
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
|
||||
]);
|
||||
|
||||
test("should successfully fetch search results", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "iPhone 13 Pro",
|
||||
listing_price: {
|
||||
amount: "800.00",
|
||||
formatted_amount: "$800.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Toronto" },
|
||||
},
|
||||
},
|
||||
creation_time: 1640995200,
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "Samsung Galaxy",
|
||||
listing_price: {
|
||||
amount: "600.00",
|
||||
formatted_amount: "$600.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Mississauga" },
|
||||
},
|
||||
},
|
||||
creation_time: 1640995300,
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"iPhone",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0].title).toBe("iPhone 13 Pro");
|
||||
expect(results[1].title).toBe("Samsung Galaxy");
|
||||
});
|
||||
|
||||
test("should filter out items without price", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "With Price",
|
||||
listing_price: {
|
||||
amount: "100.00",
|
||||
formatted_amount: "$100.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "No Price",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("With Price");
|
||||
});
|
||||
|
||||
test("should respect MAX_ITEMS parameter", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: Array.from({ length: 10 }, (_, i) => ({
|
||||
node: {
|
||||
listing: {
|
||||
id: String(i),
|
||||
marketplace_listing_title: `Item ${i}`,
|
||||
listing_price: {
|
||||
amount: `${(i + 1) * 10}.00`,
|
||||
formatted_amount: `$${(i + 1) * 10}.00`,
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
})),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
5,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(5);
|
||||
});
|
||||
|
||||
test("should return empty array for no results", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"nonexistent query",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("should handle authentication errors gracefully", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 401,
|
||||
text: () => Promise.resolve("Unauthorized"),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("should handle network errors", async () => {
|
||||
global.fetch = mock(() => Promise.reject(new Error("Network error")));
|
||||
|
||||
await expect(
|
||||
fetchFacebookItems("test", 1, "toronto", 25, mockCookies),
|
||||
).rejects.toThrow("Network error");
|
||||
});
|
||||
|
||||
test("should handle rate limiting with retry", async () => {
|
||||
let attempts = 0;
|
||||
global.fetch = mock(() => {
|
||||
attempts++;
|
||||
if (attempts === 1) {
|
||||
return Promise.resolve({
|
||||
ok: false,
|
||||
status: 429,
|
||||
headers: {
|
||||
get: (header: string) => {
|
||||
if (header === "X-RateLimit-Reset") return "1";
|
||||
return null;
|
||||
},
|
||||
},
|
||||
text: () => Promise.resolve("Rate limited"),
|
||||
});
|
||||
}
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Item 1",
|
||||
listing_price: {
|
||||
amount: "100.00",
|
||||
formatted_amount: "$100.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(attempts).toBe(2);
|
||||
expect(results).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Vehicle Listing Integration", () => {
|
||||
const mockCookies = JSON.stringify([
|
||||
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
|
||||
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
|
||||
]);
|
||||
|
||||
test("should correctly identify and parse vehicle listings", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "2006 Honda Civic",
|
||||
listing_price: {
|
||||
amount: "8000.00",
|
||||
formatted_amount: "$8,000.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "iPhone 13",
|
||||
listing_price: {
|
||||
amount: "800.00",
|
||||
formatted_amount: "$800.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"cars",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(2);
|
||||
// Both should be classified as "item" type in search results (vehicle detection is for item details)
|
||||
expect(results[0].title).toBe("2006 Honda Civic");
|
||||
expect(results[1].title).toBe("iPhone 13");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Different Categories", () => {
|
||||
const mockCookies = JSON.stringify([
|
||||
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
|
||||
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
|
||||
]);
|
||||
|
||||
test("should handle electronics listings", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Nintendo Switch",
|
||||
listing_price: {
|
||||
amount: "250.00",
|
||||
formatted_amount: "$250.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Toronto" },
|
||||
},
|
||||
},
|
||||
marketplace_listing_category_id:
|
||||
"479353692612078",
|
||||
condition: "USED",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"nintendo switch",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("Nintendo Switch");
|
||||
expect(results[0].categoryId).toBe("479353692612078");
|
||||
});
|
||||
|
||||
test("should handle home goods/furniture listings", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Dining Table",
|
||||
listing_price: {
|
||||
amount: "150.00",
|
||||
formatted_amount: "$150.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Mississauga" },
|
||||
},
|
||||
},
|
||||
marketplace_listing_category_id:
|
||||
"1569171756675761",
|
||||
condition: "USED",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"table",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("Dining Table");
|
||||
expect(results[0].categoryId).toBe("1569171756675761");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Error Scenarios", () => {
|
||||
const mockCookies = JSON.stringify([
|
||||
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
|
||||
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
|
||||
]);
|
||||
|
||||
test("should handle malformed HTML responses", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
"<html><body>Invalid HTML without JSON data</body></html>",
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("should handle 404 errors gracefully", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 404,
|
||||
text: () => Promise.resolve("Not found"),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("should handle 500 errors gracefully", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 500,
|
||||
text: () => Promise.resolve("Internal Server Error"),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user