Kijiji improvements: - Add error classes: NetworkError, ParseError, RateLimitError, ValidationError - Add exponential backoff with jitter for retries - Add request timeout (30s abort) - Add pagination support (SearchOptions.maxPages) - Add location/category mappings and resolution functions - Add enhanced DetailedListing interface with images, seller info, attributes - Add GraphQL client for seller details Facebook improvements: - Add parseFacebookCookieString() for parsing cookie strings - Add ensureFacebookCookies() with env var fallback - Add extractFacebookItemData() with multiple extraction paths - Add fetchFacebookItem() for individual item fetching - Add extraction metrics and API stability monitoring - Add vehicle-specific field extraction - Improve error handling with specific guidance for auth errors Shared utilities: - Update http.ts with new error classes and improved fetchHtml Documentation: - Port KIJIJI.md, FMARKETPLACE.md, AGENTS.md from upstream Tests: - Port kijiji-core, kijiji-integration, kijiji-utils tests - Port facebook-core, facebook-integration tests - Add test setup file Scripts: - Port parse-facebook-cookies.ts script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
364 lines
11 KiB
TypeScript
364 lines
11 KiB
TypeScript
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
import {
|
|
extractApolloState,
|
|
parseDetailedListing,
|
|
parseSearch,
|
|
} from "../src/scrapers/kijiji";
|
|
|
|
// Mock fetch globally
|
|
const originalFetch = global.fetch;
|
|
|
|
describe("HTML Parsing Integration", () => {
|
|
beforeEach(() => {
|
|
// Mock fetch for all tests
|
|
global.fetch = mock(() => {
|
|
throw new Error("fetch should be mocked in individual tests");
|
|
});
|
|
});
|
|
|
|
afterEach(() => {
|
|
global.fetch = originalFetch;
|
|
});
|
|
|
|
describe("extractApolloState", () => {
|
|
test("should extract Apollo state from valid HTML", () => {
|
|
const mockHtml =
|
|
'<html><head><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"__APOLLO_STATE__":{"ROOT_QUERY":{"test":"value"}}}}}</script></head></html>';
|
|
|
|
const result = extractApolloState(mockHtml);
|
|
expect(result).toEqual({
|
|
ROOT_QUERY: { test: "value" },
|
|
});
|
|
});
|
|
|
|
test("should return null for HTML without Apollo state", () => {
|
|
const mockHtml = "<html><body>No data here</body></html>";
|
|
const result = extractApolloState(mockHtml);
|
|
expect(result).toBeNull();
|
|
});
|
|
|
|
test("should return null for malformed JSON", () => {
|
|
const mockHtml =
|
|
'<html><script id="__NEXT_DATA__" type="application/json">{"invalid": json}</script></html>';
|
|
|
|
const result = extractApolloState(mockHtml);
|
|
expect(result).toBeNull();
|
|
});
|
|
|
|
test("should handle missing __NEXT_DATA__ element", () => {
|
|
const mockHtml = "<html><body><div>Content</div></body></html>";
|
|
const result = extractApolloState(mockHtml);
|
|
expect(result).toBeNull();
|
|
});
|
|
});
|
|
|
|
describe("parseSearch", () => {
|
|
test("should parse search results from HTML", () => {
|
|
const mockHtml = `
|
|
<html>
|
|
<script id="__NEXT_DATA__" type="application/json">
|
|
${JSON.stringify({
|
|
props: {
|
|
pageProps: {
|
|
__APOLLO_STATE__: {
|
|
"Listing:123": {
|
|
url: "/v-iphone/k0l0",
|
|
title: "iPhone 13 Pro",
|
|
},
|
|
"Listing:456": {
|
|
url: "/v-samsung/k0l0",
|
|
title: "Samsung Galaxy",
|
|
},
|
|
ROOT_QUERY: { test: "value" },
|
|
},
|
|
},
|
|
},
|
|
})}
|
|
</script>
|
|
</html>
|
|
`;
|
|
|
|
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
|
|
expect(results).toHaveLength(2);
|
|
expect(results[0]).toEqual({
|
|
name: "iPhone 13 Pro",
|
|
listingLink: "https://www.kijiji.ca/v-iphone/k0l0",
|
|
});
|
|
expect(results[1]).toEqual({
|
|
name: "Samsung Galaxy",
|
|
listingLink: "https://www.kijiji.ca/v-samsung/k0l0",
|
|
});
|
|
});
|
|
|
|
test("should handle absolute URLs", () => {
|
|
const mockHtml = `
|
|
<html>
|
|
<script id="__NEXT_DATA__" type="application/json">
|
|
${JSON.stringify({
|
|
props: {
|
|
pageProps: {
|
|
__APOLLO_STATE__: {
|
|
"Listing:123": {
|
|
url: "https://www.kijiji.ca/v-iphone/k0l0",
|
|
title: "iPhone 13 Pro",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
})}
|
|
</script>
|
|
</html>
|
|
`;
|
|
|
|
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
|
|
expect(results[0].listingLink).toBe(
|
|
"https://www.kijiji.ca/v-iphone/k0l0",
|
|
);
|
|
});
|
|
|
|
test("should filter out invalid listings", () => {
|
|
const mockHtml = `
|
|
<html>
|
|
<script id="__NEXT_DATA__" type="application/json">
|
|
${JSON.stringify({
|
|
props: {
|
|
pageProps: {
|
|
__APOLLO_STATE__: {
|
|
"Listing:123": {
|
|
url: "/v-iphone/k0l0",
|
|
title: "iPhone 13 Pro",
|
|
},
|
|
"Listing:456": {
|
|
url: "/v-samsung/k0l0",
|
|
// Missing title
|
|
},
|
|
"Other:789": {
|
|
url: "/v-other/k0l0",
|
|
title: "Other Item",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
})}
|
|
</script>
|
|
</html>
|
|
`;
|
|
|
|
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
|
|
expect(results).toHaveLength(1);
|
|
expect(results[0].name).toBe("iPhone 13 Pro");
|
|
});
|
|
|
|
test("should return empty array for invalid HTML", () => {
|
|
const results = parseSearch(
|
|
"<html><body>Invalid</body></html>",
|
|
"https://www.kijiji.ca",
|
|
);
|
|
expect(results).toEqual([]);
|
|
});
|
|
});
|
|
|
|
describe("parseDetailedListing", () => {
|
|
test("should parse detailed listing with all fields", async () => {
|
|
const mockHtml = `
|
|
<html>
|
|
<script id="__NEXT_DATA__" type="application/json">
|
|
${JSON.stringify({
|
|
props: {
|
|
pageProps: {
|
|
__APOLLO_STATE__: {
|
|
"Listing:123": {
|
|
url: "/v-iphone-13-pro/k0l0",
|
|
title: "iPhone 13 Pro 256GB",
|
|
description: "Excellent condition iPhone 13 Pro",
|
|
price: {
|
|
amount: 80000,
|
|
currency: "CAD",
|
|
type: "FIXED",
|
|
},
|
|
type: "OFFER",
|
|
status: "ACTIVE",
|
|
activationDate: "2024-01-15T10:00:00.000Z",
|
|
endDate: "2025-01-15T10:00:00.000Z",
|
|
metrics: { views: 150 },
|
|
location: {
|
|
address: "Toronto, ON",
|
|
id: 1700273,
|
|
name: "Toronto",
|
|
coordinates: {
|
|
latitude: 43.6532,
|
|
longitude: -79.3832,
|
|
},
|
|
},
|
|
imageUrls: [
|
|
"https://media.kijiji.ca/api/v1/image1.jpg",
|
|
"https://media.kijiji.ca/api/v1/image2.jpg",
|
|
],
|
|
imageCount: 2,
|
|
categoryId: 132,
|
|
adSource: "ORGANIC",
|
|
flags: {
|
|
topAd: false,
|
|
priceDrop: true,
|
|
},
|
|
posterInfo: {
|
|
posterId: "user123",
|
|
rating: 4.8,
|
|
},
|
|
attributes: [
|
|
{
|
|
canonicalName: "forsaleby",
|
|
canonicalValues: ["ownr"],
|
|
},
|
|
{
|
|
canonicalName: "phonecarrier",
|
|
canonicalValues: ["unlocked"],
|
|
},
|
|
],
|
|
},
|
|
},
|
|
},
|
|
},
|
|
})}
|
|
</script>
|
|
</html>
|
|
`;
|
|
|
|
const result = await parseDetailedListing(
|
|
mockHtml,
|
|
"https://www.kijiji.ca",
|
|
);
|
|
expect(result).toEqual({
|
|
url: "https://www.kijiji.ca/v-iphone-13-pro/k0l0",
|
|
title: "iPhone 13 Pro 256GB",
|
|
description: "Excellent condition iPhone 13 Pro",
|
|
listingPrice: {
|
|
amountFormatted: "$800.00",
|
|
cents: 80000,
|
|
currency: "CAD",
|
|
},
|
|
listingType: "OFFER",
|
|
listingStatus: "ACTIVE",
|
|
creationDate: "2024-01-15T10:00:00.000Z",
|
|
endDate: "2025-01-15T10:00:00.000Z",
|
|
numberOfViews: 150,
|
|
address: "Toronto, ON",
|
|
images: [
|
|
"https://media.kijiji.ca/api/v1/image1.jpg",
|
|
"https://media.kijiji.ca/api/v1/image2.jpg",
|
|
],
|
|
categoryId: 132,
|
|
adSource: "ORGANIC",
|
|
flags: {
|
|
topAd: false,
|
|
priceDrop: true,
|
|
},
|
|
attributes: {
|
|
forsaleby: ["ownr"],
|
|
phonecarrier: ["unlocked"],
|
|
},
|
|
location: {
|
|
id: 1700273,
|
|
name: "Toronto",
|
|
coordinates: {
|
|
latitude: 43.6532,
|
|
longitude: -79.3832,
|
|
},
|
|
},
|
|
sellerInfo: {
|
|
posterId: "user123",
|
|
rating: 4.8,
|
|
},
|
|
});
|
|
});
|
|
|
|
test("should return null for contact-based pricing", async () => {
|
|
const mockHtml = `
|
|
<html>
|
|
<script id="__NEXT_DATA__" type="application/json">
|
|
${JSON.stringify({
|
|
props: {
|
|
pageProps: {
|
|
__APOLLO_STATE__: {
|
|
"Listing:123": {
|
|
url: "/v-iphone/k0l0",
|
|
title: "iPhone for Sale",
|
|
price: {
|
|
type: "CONTACT",
|
|
amount: null,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
})}
|
|
</script>
|
|
</html>
|
|
`;
|
|
|
|
const result = await parseDetailedListing(
|
|
mockHtml,
|
|
"https://www.kijiji.ca",
|
|
);
|
|
expect(result).toBeNull();
|
|
});
|
|
|
|
test("should handle missing optional fields", async () => {
|
|
const mockHtml = `
|
|
<html>
|
|
<script id="__NEXT_DATA__" type="application/json">
|
|
${JSON.stringify({
|
|
props: {
|
|
pageProps: {
|
|
__APOLLO_STATE__: {
|
|
"Listing:123": {
|
|
url: "/v-iphone/k0l0",
|
|
title: "iPhone 13",
|
|
price: { amount: 50000 },
|
|
},
|
|
},
|
|
},
|
|
},
|
|
})}
|
|
</script>
|
|
</html>
|
|
`;
|
|
|
|
const result = await parseDetailedListing(
|
|
mockHtml,
|
|
"https://www.kijiji.ca",
|
|
);
|
|
expect(result).toEqual({
|
|
url: "https://www.kijiji.ca/v-iphone/k0l0",
|
|
title: "iPhone 13",
|
|
description: undefined,
|
|
listingPrice: {
|
|
amountFormatted: "$500.00",
|
|
cents: 50000,
|
|
currency: undefined,
|
|
},
|
|
listingType: undefined,
|
|
listingStatus: undefined,
|
|
creationDate: undefined,
|
|
endDate: undefined,
|
|
numberOfViews: undefined,
|
|
address: null,
|
|
images: [],
|
|
categoryId: 0,
|
|
adSource: "UNKNOWN",
|
|
flags: {
|
|
topAd: false,
|
|
priceDrop: false,
|
|
},
|
|
attributes: {},
|
|
location: {
|
|
id: 0,
|
|
name: "Unknown",
|
|
coordinates: undefined,
|
|
},
|
|
sellerInfo: undefined,
|
|
});
|
|
});
|
|
});
|
|
});
|