feat: port upstream scraper improvements to monorepo
Kijiji improvements: - Add error classes: NetworkError, ParseError, RateLimitError, ValidationError - Add exponential backoff with jitter for retries - Add request timeout (30s abort) - Add pagination support (SearchOptions.maxPages) - Add location/category mappings and resolution functions - Add enhanced DetailedListing interface with images, seller info, attributes - Add GraphQL client for seller details Facebook improvements: - Add parseFacebookCookieString() for parsing cookie strings - Add ensureFacebookCookies() with env var fallback - Add extractFacebookItemData() with multiple extraction paths - Add fetchFacebookItem() for individual item fetching - Add extraction metrics and API stability monitoring - Add vehicle-specific field extraction - Improve error handling with specific guidance for auth errors Shared utilities: - Update http.ts with new error classes and improved fetchHtml Documentation: - Port KIJIJI.md, FMARKETPLACE.md, AGENTS.md from upstream Tests: - Port kijiji-core, kijiji-integration, kijiji-utils tests - Port facebook-core, facebook-integration tests - Add test setup file Scripts: - Port parse-facebook-cookies.ts script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
183
packages/core/scripts/parse-facebook-cookies.ts
Normal file
183
packages/core/scripts/parse-facebook-cookies.ts
Normal file
@@ -0,0 +1,183 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
/**
|
||||
* Facebook Cookie Parser CLI
|
||||
*
|
||||
* Parses Facebook cookie strings into JSON format for the marketplace scraper
|
||||
*
|
||||
* Usage:
|
||||
* bun run scripts/parse-facebook-cookies.ts "c_user=123; xs=abc"
|
||||
* bun run scripts/parse-facebook-cookies.ts --input cookies.txt
|
||||
* echo "c_user=123; xs=abc" | bun run scripts/parse-facebook-cookies.ts
|
||||
* bun run scripts/parse-facebook-cookies.ts "cookie_string" --output my-cookies.json
|
||||
*/
|
||||
|
||||
import { parseFacebookCookieString } from "../src/facebook";
|
||||
|
||||
interface Cookie {
|
||||
name: string;
|
||||
value: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
secure?: boolean;
|
||||
httpOnly?: boolean;
|
||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||
expirationDate?: number;
|
||||
storeId?: string;
|
||||
}
|
||||
|
||||
function parseFacebookCookieStringCLI(cookieString: string): Cookie[] {
|
||||
if (!cookieString || !cookieString.trim()) {
|
||||
console.error("❌ Error: Empty or invalid cookie string provided");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const cookies = parseFacebookCookieString(cookieString);
|
||||
|
||||
if (cookies.length === 0) {
|
||||
console.error("❌ Error: No valid cookies found in input string");
|
||||
console.error('Expected format: "name1=value1; name2=value2;"');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
return cookies;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0 && process.stdin.isTTY === false) {
|
||||
// Read from stdin
|
||||
let input = "";
|
||||
for await (const chunk of process.stdin) {
|
||||
input += chunk;
|
||||
}
|
||||
input = input.trim();
|
||||
|
||||
if (!input) {
|
||||
console.error("❌ Error: No input provided via stdin");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const cookies = parseFacebookCookieStringCLI(input);
|
||||
await writeOutput(cookies, "./cookies/facebook.json");
|
||||
return;
|
||||
}
|
||||
|
||||
let cookieString = "";
|
||||
let outputPath = "./cookies/facebook.json";
|
||||
let inputPath = "";
|
||||
|
||||
// Parse command line arguments
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
|
||||
if (arg === "--input" || arg === "-i") {
|
||||
inputPath = args[i + 1];
|
||||
i++; // Skip next arg
|
||||
} else if (arg === "--output" || arg === "-o") {
|
||||
outputPath = args[i + 1];
|
||||
i++; // Skip next arg
|
||||
} else if (arg === "--help" || arg === "-h") {
|
||||
showHelp();
|
||||
return;
|
||||
} else if (!arg.startsWith("-")) {
|
||||
// Assume this is the cookie string
|
||||
cookieString = arg;
|
||||
} else {
|
||||
console.error(`❌ Unknown option: ${arg}`);
|
||||
showHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Read from file if specified
|
||||
if (inputPath) {
|
||||
try {
|
||||
const file = Bun.file(inputPath);
|
||||
if (!(await file.exists())) {
|
||||
console.error(`❌ Error: Input file not found: ${inputPath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
cookieString = await file.text();
|
||||
} catch (error) {
|
||||
console.error(`❌ Error reading input file: ${error}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!cookieString.trim()) {
|
||||
console.error("❌ Error: No cookie string provided");
|
||||
console.error(
|
||||
"Provide cookie string as argument, --input file, or via stdin",
|
||||
);
|
||||
showHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const cookies = parseFacebookCookieStringCLI(cookieString);
|
||||
await writeOutput(cookies, outputPath);
|
||||
}
|
||||
|
||||
async function writeOutput(cookies: Cookie[], outputPath: string) {
|
||||
try {
|
||||
await Bun.write(outputPath, JSON.stringify(cookies, null, 2));
|
||||
console.log(`✅ Successfully parsed ${cookies.length} Facebook cookies`);
|
||||
console.log(`📁 Saved to: ${outputPath}`);
|
||||
|
||||
// Show summary of parsed cookies
|
||||
console.log("\n📋 Parsed cookies:");
|
||||
for (const cookie of cookies) {
|
||||
console.log(
|
||||
` • ${cookie.name}: ${cookie.value.substring(0, 20)}${cookie.value.length > 20 ? "..." : ""}`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`❌ Error writing to output file: ${error}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
function showHelp() {
|
||||
console.log(`
|
||||
Facebook Cookie Parser CLI
|
||||
|
||||
Parses Facebook cookie strings into JSON format for the marketplace scraper.
|
||||
|
||||
USAGE:
|
||||
bun run scripts/parse-facebook-cookies.ts [OPTIONS] [COOKIE_STRING]
|
||||
|
||||
EXAMPLES:
|
||||
# Parse from command line argument
|
||||
bun run scripts/parse-facebook-cookies.ts "c_user=123; xs=abc"
|
||||
|
||||
# Parse from file
|
||||
bun run scripts/parse-facebook-cookies.ts --input cookies.txt
|
||||
|
||||
# Parse from stdin
|
||||
echo "c_user=123; xs=abc" | bun run scripts/parse-facebook-cookies.ts
|
||||
|
||||
# Output to custom file
|
||||
bun run scripts/parse-facebook-cookies.ts "cookie_string" --output my-cookies.json
|
||||
|
||||
OPTIONS:
|
||||
-i, --input FILE Read cookie string from file
|
||||
-o, --output FILE Output file path (default: ./cookies/facebook.json)
|
||||
-h, --help Show this help message
|
||||
|
||||
COOKIE FORMAT:
|
||||
Semicolon-separated name=value pairs
|
||||
Example: "c_user=123456789; xs=abcdef123456; fr=xyz789"
|
||||
|
||||
OUTPUT:
|
||||
JSON array of cookie objects saved to ./cookies/facebook.json
|
||||
`);
|
||||
}
|
||||
|
||||
// Run the CLI
|
||||
if (import.meta.main) {
|
||||
main().catch((error) => {
|
||||
console.error(`❌ Unexpected error: ${error}`);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
@@ -1,8 +1,36 @@
|
||||
// Export all scrapers
|
||||
export { default as fetchKijijiItems, slugify } from "./scrapers/kijiji";
|
||||
export type { KijijiListingDetails } from "./scrapers/kijiji";
|
||||
export {
|
||||
default as fetchKijijiItems,
|
||||
slugify,
|
||||
resolveLocationId,
|
||||
resolveCategoryId,
|
||||
buildSearchUrl,
|
||||
extractApolloState,
|
||||
parseSearch,
|
||||
parseDetailedListing,
|
||||
HttpError,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
ValidationError,
|
||||
} from "./scrapers/kijiji";
|
||||
export type {
|
||||
KijijiListingDetails,
|
||||
DetailedListing,
|
||||
SearchOptions,
|
||||
ListingFetchOptions,
|
||||
} from "./scrapers/kijiji";
|
||||
|
||||
export { default as fetchFacebookItems } from "./scrapers/facebook";
|
||||
export {
|
||||
default as fetchFacebookItems,
|
||||
fetchFacebookItem,
|
||||
parseFacebookCookieString,
|
||||
ensureFacebookCookies,
|
||||
extractFacebookMarketplaceData,
|
||||
extractFacebookItemData,
|
||||
parseFacebookAds,
|
||||
parseFacebookItem,
|
||||
} from "./scrapers/facebook";
|
||||
export type { FacebookListingDetails } from "./scrapers/facebook";
|
||||
|
||||
export { default as fetchEbayItems } from "./scrapers/ebay";
|
||||
|
||||
@@ -26,7 +26,7 @@ interface Cookie {
|
||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||
session?: boolean;
|
||||
expirationDate?: number;
|
||||
partitionKey?: any;
|
||||
partitionKey?: Record<string, unknown>;
|
||||
storeId?: string;
|
||||
}
|
||||
|
||||
@@ -38,6 +38,8 @@ interface FacebookAdNode {
|
||||
listing_price?: {
|
||||
amount?: string | number;
|
||||
currency?: string;
|
||||
amount_with_offset_in_currency?: string | number;
|
||||
formatted_amount?: string;
|
||||
};
|
||||
location?: {
|
||||
reverse_geocode?: {
|
||||
@@ -47,6 +49,24 @@ interface FacebookAdNode {
|
||||
};
|
||||
};
|
||||
creation_time?: number;
|
||||
is_sold?: boolean;
|
||||
is_pending?: boolean;
|
||||
is_live?: boolean;
|
||||
is_hidden?: boolean;
|
||||
primary_listing_photo?: {
|
||||
image?: {
|
||||
uri?: string;
|
||||
};
|
||||
};
|
||||
listing_video?: {
|
||||
id?: string;
|
||||
};
|
||||
marketplace_listing_seller?: {
|
||||
name?: string;
|
||||
id?: string;
|
||||
};
|
||||
marketplace_listing_category_id?: string;
|
||||
delivery_types?: string[];
|
||||
[k: string]: unknown;
|
||||
};
|
||||
[k: string]: unknown;
|
||||
@@ -65,6 +85,97 @@ interface FacebookMarketplaceSearch {
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
interface FacebookMarketplaceItem {
|
||||
// Basic identification
|
||||
id: string;
|
||||
__typename: "GroupCommerceProductItem";
|
||||
|
||||
// Listing content
|
||||
marketplace_listing_title: string;
|
||||
redacted_description?: {
|
||||
text: string;
|
||||
};
|
||||
custom_title?: string;
|
||||
|
||||
// Pricing
|
||||
formatted_price?: {
|
||||
text: string;
|
||||
};
|
||||
listing_price?: {
|
||||
amount: string;
|
||||
currency: string;
|
||||
amount_with_offset: string;
|
||||
};
|
||||
|
||||
// Location
|
||||
location_text?: {
|
||||
text: string;
|
||||
};
|
||||
location?: {
|
||||
latitude: number;
|
||||
longitude: number;
|
||||
reverse_geocode_detailed?: {
|
||||
country_alpha_two: string;
|
||||
postal_code_trimmed: string;
|
||||
};
|
||||
};
|
||||
|
||||
// Status flags
|
||||
is_live?: boolean;
|
||||
is_sold?: boolean;
|
||||
is_pending?: boolean;
|
||||
is_hidden?: boolean;
|
||||
is_draft?: boolean;
|
||||
|
||||
// Timing
|
||||
creation_time?: number;
|
||||
|
||||
// Seller information
|
||||
marketplace_listing_seller?: {
|
||||
__typename: "User";
|
||||
id: string;
|
||||
name: string;
|
||||
profile_picture?: {
|
||||
uri: string;
|
||||
};
|
||||
join_time?: number;
|
||||
};
|
||||
|
||||
// Vehicle-specific fields (for automotive listings)
|
||||
vehicle_make_display_name?: string;
|
||||
vehicle_model_display_name?: string;
|
||||
vehicle_odometer_data?: {
|
||||
unit: "KILOMETERS" | "MILES";
|
||||
value: number;
|
||||
};
|
||||
vehicle_transmission_type?: "AUTOMATIC" | "MANUAL";
|
||||
vehicle_exterior_color?: string;
|
||||
vehicle_interior_color?: string;
|
||||
vehicle_condition?: "EXCELLENT" | "GOOD" | "FAIR" | "POOR";
|
||||
vehicle_fuel_type?: string;
|
||||
vehicle_trim_display_name?: string;
|
||||
|
||||
// Category and commerce
|
||||
marketplace_listing_category_id?: string;
|
||||
condition?: string;
|
||||
|
||||
// Commerce features
|
||||
delivery_types?: string[];
|
||||
is_shipping_offered?: boolean;
|
||||
is_buy_now_enabled?: boolean;
|
||||
can_buyer_make_checkout_offer?: boolean;
|
||||
|
||||
// Communication
|
||||
messaging_enabled?: boolean;
|
||||
first_message_suggested_value?: string;
|
||||
|
||||
// Metadata
|
||||
logging_id?: string;
|
||||
reportable_ent_id?: string;
|
||||
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
export interface FacebookListingDetails {
|
||||
url: string;
|
||||
title: string;
|
||||
@@ -96,7 +207,10 @@ export interface FacebookListingDetails {
|
||||
/**
|
||||
* Load Facebook cookies from file or string
|
||||
*/
|
||||
async function loadFacebookCookies(cookiesSource?: string): Promise<Cookie[]> {
|
||||
async function loadFacebookCookies(
|
||||
cookiesSource?: string,
|
||||
cookiePath = "./cookies/facebook.json"
|
||||
): Promise<Cookie[]> {
|
||||
// First try to load from provided string parameter
|
||||
if (cookiesSource) {
|
||||
try {
|
||||
@@ -106,7 +220,7 @@ async function loadFacebookCookies(cookiesSource?: string): Promise<Cookie[]> {
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string",
|
||||
typeof cookie.value === "string"
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
@@ -114,9 +228,9 @@ async function loadFacebookCookies(cookiesSource?: string): Promise<Cookie[]> {
|
||||
}
|
||||
}
|
||||
|
||||
// Try to load from ./cookies/facebook.json
|
||||
// Try to load from specified path
|
||||
try {
|
||||
const cookiesPath = "./cookies/facebook.json";
|
||||
const cookiesPath = cookiePath;
|
||||
const file = Bun.file(cookiesPath);
|
||||
if (await file.exists()) {
|
||||
const content = await file.text();
|
||||
@@ -126,17 +240,100 @@ async function loadFacebookCookies(cookiesSource?: string): Promise<Cookie[]> {
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string",
|
||||
typeof cookie.value === "string"
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Could not load cookies from ./cookies/facebook.json: ${e}`);
|
||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Facebook cookie string into Cookie array format
|
||||
*/
|
||||
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
||||
if (!cookieString || !cookieString.trim()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return cookieString
|
||||
.split(";")
|
||||
.map((pair) => pair.trim())
|
||||
.filter((pair) => pair.includes("="))
|
||||
.map((pair) => {
|
||||
const [name, value] = pair.split("=", 2);
|
||||
const trimmedName = name.trim();
|
||||
const trimmedValue = value.trim();
|
||||
|
||||
// Skip empty names or values
|
||||
if (!trimmedName || !trimmedValue) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
name: trimmedName,
|
||||
value: decodeURIComponent(trimmedValue),
|
||||
domain: ".facebook.com",
|
||||
path: "/",
|
||||
secure: true,
|
||||
httpOnly: false,
|
||||
sameSite: "lax" as const,
|
||||
expirationDate: undefined, // Session cookies
|
||||
};
|
||||
})
|
||||
.filter((cookie): cookie is Cookie => cookie !== null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure Facebook cookies are available, parsing from env var if needed
|
||||
*/
|
||||
export async function ensureFacebookCookies(
|
||||
cookiePath = "./cookies/facebook.json"
|
||||
): Promise<Cookie[]> {
|
||||
// First try to load existing cookies
|
||||
try {
|
||||
const existing = await loadFacebookCookies(undefined, cookiePath);
|
||||
if (existing.length > 0) {
|
||||
return existing;
|
||||
}
|
||||
} catch {
|
||||
// File doesn't exist or is invalid, continue to check env var
|
||||
}
|
||||
|
||||
// Try to parse from environment variable
|
||||
const cookieString = process.env.FACEBOOK_COOKIE;
|
||||
if (!cookieString || !cookieString.trim()) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Either:\n" +
|
||||
" 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
|
||||
" 2. Create ./cookies/facebook.json manually with cookie array"
|
||||
);
|
||||
}
|
||||
|
||||
// Parse the cookie string
|
||||
const cookies = parseFacebookCookieString(cookieString);
|
||||
if (cookies.length === 0) {
|
||||
throw new Error(
|
||||
"FACEBOOK_COOKIE environment variable contains no valid cookies. " +
|
||||
'Expected format: "name1=value1; name2=value2;"'
|
||||
);
|
||||
}
|
||||
|
||||
// Save to file for future use
|
||||
try {
|
||||
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
|
||||
console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
|
||||
} catch (error) {
|
||||
console.warn(`Could not save cookies to ${cookiePath}: ${error}`);
|
||||
// Continue anyway, we have the cookies in memory
|
||||
}
|
||||
|
||||
return cookies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format cookies array into Cookie header string
|
||||
*/
|
||||
@@ -150,10 +347,9 @@ function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
|
||||
domain.endsWith(cookie.domain.slice(1)) ||
|
||||
domain === cookie.domain.slice(1)
|
||||
);
|
||||
} else {
|
||||
// Host-only cookie
|
||||
return cookie.domain === domain;
|
||||
}
|
||||
// Host-only cookie
|
||||
return cookie.domain === domain;
|
||||
})
|
||||
.filter((cookie) => {
|
||||
// Check expiration
|
||||
@@ -172,13 +368,55 @@ class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status: number,
|
||||
public readonly url: string,
|
||||
public readonly url: string
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Extraction Metrics -----------------------------
|
||||
|
||||
/**
|
||||
* Monitor API extraction success/failure for detecting changes
|
||||
*/
|
||||
const extractionStats = {
|
||||
totalExtractions: 0,
|
||||
successfulExtractions: 0,
|
||||
failedExtractions: 0,
|
||||
lastApiChangeDetected: null as Date | null,
|
||||
};
|
||||
|
||||
/**
|
||||
* Log extraction metrics for monitoring API stability
|
||||
*/
|
||||
function logExtractionMetrics(success: boolean, itemId?: string) {
|
||||
extractionStats.totalExtractions++;
|
||||
if (success) {
|
||||
extractionStats.successfulExtractions++;
|
||||
} else {
|
||||
extractionStats.failedExtractions++;
|
||||
}
|
||||
|
||||
// Log warning if extraction success rate drops below 80%
|
||||
const successRate =
|
||||
extractionStats.successfulExtractions / extractionStats.totalExtractions;
|
||||
if (
|
||||
extractionStats.totalExtractions > 10 &&
|
||||
successRate < 0.8 &&
|
||||
!extractionStats.lastApiChangeDetected
|
||||
) {
|
||||
console.warn(
|
||||
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes."
|
||||
);
|
||||
extractionStats.lastApiChangeDetected = new Date();
|
||||
}
|
||||
|
||||
if (!success && itemId) {
|
||||
console.warn(`Facebook API extraction failed for item ${itemId}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- HTTP Client -----------------------------
|
||||
|
||||
/**
|
||||
@@ -195,7 +433,7 @@ async function fetchHtml(
|
||||
retryBaseMs?: number;
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
cookies?: string;
|
||||
},
|
||||
}
|
||||
): Promise<HTMLString> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
||||
@@ -219,7 +457,7 @@ async function fetchHtml(
|
||||
|
||||
// Add cookies if provided
|
||||
if (opts?.cookies) {
|
||||
headers["cookie"] = opts.cookies;
|
||||
headers.cookie = opts.cookies;
|
||||
}
|
||||
|
||||
const res = await fetch(url, {
|
||||
@@ -234,7 +472,9 @@ async function fetchHtml(
|
||||
if (!res.ok) {
|
||||
// Respect 429 reset if provided
|
||||
if (res.status === 429) {
|
||||
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN;
|
||||
const resetSeconds = rateLimitReset
|
||||
? Number(rateLimitReset)
|
||||
: Number.NaN;
|
||||
const waitMs = Number.isFinite(resetSeconds)
|
||||
? Math.max(0, resetSeconds * 1000)
|
||||
: (attempt + 1) * retryBaseMs;
|
||||
@@ -247,7 +487,7 @@ async function fetchHtml(
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
|
||||
res.status,
|
||||
url,
|
||||
url
|
||||
);
|
||||
}
|
||||
// Retry on 5xx
|
||||
@@ -258,7 +498,7 @@ async function fetchHtml(
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url,
|
||||
url
|
||||
);
|
||||
}
|
||||
|
||||
@@ -280,8 +520,8 @@ async function fetchHtml(
|
||||
/**
|
||||
Extract marketplace search data from Facebook page script tags
|
||||
*/
|
||||
function extractFacebookMarketplaceData(
|
||||
htmlString: HTMLString,
|
||||
export function extractFacebookMarketplaceData(
|
||||
htmlString: HTMLString
|
||||
): FacebookAdNode[] | null {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const scripts = document.querySelectorAll("script");
|
||||
@@ -289,7 +529,7 @@ function extractFacebookMarketplaceData(
|
||||
let marketplaceData: FacebookMarketplaceSearch | null = null;
|
||||
|
||||
// Find the script containing the require data with marketplace_search
|
||||
for (const script of scripts as unknown as HTMLScriptElement[]) {
|
||||
for (const script of Array.from(scripts) as HTMLScriptElement[]) {
|
||||
const scriptText = script.textContent;
|
||||
if (!scriptText) continue;
|
||||
|
||||
@@ -301,27 +541,34 @@ function extractFacebookMarketplaceData(
|
||||
// Try multiple navigation paths to find marketplace_search
|
||||
const paths = [
|
||||
// Original path from example
|
||||
() => parsed.require[0][3][0]['__bbox']['require'][0][3][1]['__bbox']['result']['data']['marketplace_search'],
|
||||
() =>
|
||||
parsed.require[0][3][0].__bbox.require[0][3][1].__bbox.result.data
|
||||
.marketplace_search,
|
||||
// Alternative path structure
|
||||
() => parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search,
|
||||
() =>
|
||||
parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search,
|
||||
// Another variation
|
||||
() => parsed.require[0][3][0]['__bbox']['result']['data']['marketplace_search'],
|
||||
() => parsed.require[0][3][0].__bbox.result.data.marketplace_search,
|
||||
// Direct access for some responses
|
||||
() => {
|
||||
for (const item of parsed.require) {
|
||||
if (item && item.length >= 4 && item[3]) {
|
||||
const bbox = item[3]?.['__bbox']?.result?.data?.marketplace_search;
|
||||
const bbox = item[3]?.__bbox?.result?.data?.marketplace_search;
|
||||
if (bbox) return bbox;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
},
|
||||
];
|
||||
|
||||
for (const getData of paths) {
|
||||
try {
|
||||
const result = getData();
|
||||
if (result && isRecord(result) && result.feed_units?.edges) {
|
||||
if (
|
||||
result &&
|
||||
isRecord(result) &&
|
||||
(result as any).feed_units?.edges?.length > 0
|
||||
) {
|
||||
marketplaceData = result as FacebookMarketplaceSearch;
|
||||
break;
|
||||
}
|
||||
@@ -334,9 +581,13 @@ function extractFacebookMarketplaceData(
|
||||
}
|
||||
|
||||
// Also check for direct marketplace_search in the parsed data
|
||||
if (parsed.marketplace_search && isRecord(parsed.marketplace_search) && parsed.marketplace_search.feed_units?.edges) {
|
||||
marketplaceData = parsed.marketplace_search as FacebookMarketplaceSearch;
|
||||
break;
|
||||
if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) {
|
||||
const searchData =
|
||||
parsed.marketplace_search as FacebookMarketplaceSearch;
|
||||
if (searchData.feed_units?.edges?.length ?? 0 > 0) {
|
||||
marketplaceData = searchData;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Ignore parsing errors for other scripts
|
||||
@@ -344,19 +595,160 @@ function extractFacebookMarketplaceData(
|
||||
}
|
||||
}
|
||||
|
||||
if (!marketplaceData?.feed_units?.edges) {
|
||||
if (!marketplaceData?.feed_units?.edges?.length) {
|
||||
console.warn("No marketplace data found in HTML response");
|
||||
return null;
|
||||
}
|
||||
|
||||
console.log(`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`);
|
||||
console.log(
|
||||
`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`
|
||||
);
|
||||
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
|
||||
}
|
||||
|
||||
/**
|
||||
Extract marketplace item details from Facebook item page HTML
|
||||
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
|
||||
*/
|
||||
export function extractFacebookItemData(
|
||||
htmlString: HTMLString
|
||||
): FacebookMarketplaceItem | null {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const scripts = document.querySelectorAll("script");
|
||||
|
||||
for (const script of scripts) {
|
||||
const scriptText = script.textContent;
|
||||
if (!scriptText) continue;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(scriptText);
|
||||
|
||||
// Check for the require structure with marketplace product details
|
||||
if (parsed.require && Array.isArray(parsed.require)) {
|
||||
// Try multiple extraction paths discovered from reverse engineering
|
||||
const extractionPaths = [
|
||||
// Path 1: Primary path from current API structure
|
||||
() =>
|
||||
parsed.require[0][3].__bbox.result.data.viewer
|
||||
.marketplace_product_details_page.target,
|
||||
// Path 2: Alternative path with nested require
|
||||
() =>
|
||||
parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data
|
||||
.viewer.marketplace_product_details_page.target,
|
||||
// Path 3: Variation without the [0] index
|
||||
() =>
|
||||
parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data
|
||||
.viewer.marketplace_product_details_page.target,
|
||||
// Path 4-5: Additional fallback paths for edge cases
|
||||
() =>
|
||||
parsed.require[0][3][1]?.__bbox?.result?.data?.viewer
|
||||
?.marketplace_product_details_page?.target,
|
||||
() =>
|
||||
parsed.require[0][3][2]?.__bbox?.result?.data?.viewer
|
||||
?.marketplace_product_details_page?.target,
|
||||
];
|
||||
|
||||
let pathIndex = 0;
|
||||
for (const getPath of extractionPaths) {
|
||||
try {
|
||||
const targetData = getPath();
|
||||
if (
|
||||
targetData &&
|
||||
typeof targetData === "object" &&
|
||||
targetData.id &&
|
||||
targetData.marketplace_listing_title &&
|
||||
targetData.__typename === "GroupCommerceProductItem"
|
||||
) {
|
||||
console.log(
|
||||
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`
|
||||
);
|
||||
return targetData as FacebookMarketplaceItem;
|
||||
}
|
||||
} catch {
|
||||
// Path not found or invalid, try next path
|
||||
}
|
||||
pathIndex++;
|
||||
}
|
||||
|
||||
// Fallback: Search recursively for marketplace data in the parsed structure
|
||||
const findMarketplaceData = (
|
||||
obj: unknown,
|
||||
depth = 0,
|
||||
maxDepth = 10
|
||||
): FacebookMarketplaceItem | null => {
|
||||
if (depth > maxDepth) return null; // Prevent infinite recursion
|
||||
if (isRecord(obj)) {
|
||||
// Check if this object matches the expected marketplace item structure
|
||||
if (
|
||||
(obj as any).marketplace_listing_title &&
|
||||
(obj as any).id &&
|
||||
(obj as any).__typename === "GroupCommerceProductItem" &&
|
||||
(obj as any).redacted_description
|
||||
) {
|
||||
return obj as unknown as FacebookMarketplaceItem;
|
||||
}
|
||||
// Recursively search nested objects and arrays
|
||||
for (const key in obj) {
|
||||
const value = obj[key];
|
||||
if (isRecord(value) || Array.isArray(value)) {
|
||||
const result = findMarketplaceData(value, depth + 1, maxDepth);
|
||||
if (result) return result;
|
||||
}
|
||||
}
|
||||
} else if (Array.isArray(obj)) {
|
||||
// Search through arrays
|
||||
for (const item of obj) {
|
||||
const result = findMarketplaceData(item, depth + 1, maxDepth);
|
||||
if (result) return result;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
// Search through the entire require structure
|
||||
const recursiveResult = findMarketplaceData(parsed.require);
|
||||
if (recursiveResult) {
|
||||
console.log(
|
||||
"Successfully extracted Facebook item data using recursive search"
|
||||
);
|
||||
return recursiveResult;
|
||||
}
|
||||
|
||||
// Additional search in other potential locations
|
||||
if (
|
||||
parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page
|
||||
?.target
|
||||
) {
|
||||
const bboxData =
|
||||
parsed.__bbox.result.data.viewer.marketplace_product_details_page
|
||||
.target;
|
||||
if (
|
||||
bboxData &&
|
||||
typeof bboxData === "object" &&
|
||||
bboxData.id &&
|
||||
bboxData.marketplace_listing_title &&
|
||||
bboxData.__typename === "GroupCommerceProductItem"
|
||||
) {
|
||||
console.log(
|
||||
"Successfully extracted Facebook item data from __bbox structure"
|
||||
);
|
||||
return bboxData as FacebookMarketplaceItem;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Log parsing errors for debugging but continue to next script
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
Parse Facebook marketplace search results into ListingDetails[]
|
||||
*/
|
||||
function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] {
|
||||
export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] {
|
||||
const results: FacebookListingDetails[] = [];
|
||||
|
||||
for (const adJson of ads) {
|
||||
@@ -376,9 +768,10 @@ function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] {
|
||||
// - formatted_amount: human-readable price (like "CA$1")
|
||||
let cents: number;
|
||||
if (priceObj.amount != null) {
|
||||
const dollars = typeof priceObj.amount === 'string'
|
||||
? Number.parseFloat(priceObj.amount)
|
||||
: priceObj.amount;
|
||||
const dollars =
|
||||
typeof priceObj.amount === "string"
|
||||
? Number.parseFloat(priceObj.amount)
|
||||
: priceObj.amount;
|
||||
cents = Math.round(dollars * 100);
|
||||
} else if (priceObj.amount_with_offset_in_currency != null) {
|
||||
// Fallback: try to extract cents from amount_with_offset_in_currency
|
||||
@@ -390,7 +783,7 @@ function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] {
|
||||
if (priceObj.formatted_amount) {
|
||||
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
|
||||
if (match) {
|
||||
const dollars = Number.parseFloat(match[0].replace(',', ''));
|
||||
const dollars = Number.parseFloat(match[0].replace(",", ""));
|
||||
if (!Number.isNaN(dollars)) {
|
||||
cents = Math.round(dollars * 100);
|
||||
} else {
|
||||
@@ -435,19 +828,24 @@ function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] {
|
||||
|
||||
// Extract image and video URLs
|
||||
const imageUrl = listing.primary_listing_photo?.image?.uri;
|
||||
const videoUrl = listing.listing_video ? `https://www.facebook.com/${listing.listing_video.id}/` : undefined;
|
||||
const videoUrl = listing.listing_video
|
||||
? `https://www.facebook.com/${listing.listing_video.id}/`
|
||||
: undefined;
|
||||
|
||||
// Extract seller information
|
||||
const seller = listing.marketplace_listing_seller ? {
|
||||
name: listing.marketplace_listing_seller.name,
|
||||
id: listing.marketplace_listing_seller.id
|
||||
} : undefined;
|
||||
const seller = listing.marketplace_listing_seller
|
||||
? {
|
||||
name: listing.marketplace_listing_seller.name,
|
||||
id: listing.marketplace_listing_seller.id,
|
||||
}
|
||||
: undefined;
|
||||
|
||||
const listingDetails: FacebookListingDetails = {
|
||||
url,
|
||||
title,
|
||||
listingPrice: {
|
||||
amountFormatted: priceObj.formatted_amount || formatCentsToCurrency(cents / 100, "en-CA"),
|
||||
amountFormatted:
|
||||
priceObj.formatted_amount || formatCentsToCurrency(cents / 100, "en-CA"),
|
||||
cents,
|
||||
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
|
||||
},
|
||||
@@ -472,6 +870,98 @@ function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] {
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
Parse Facebook marketplace item details into ListingDetails format
|
||||
Updated for 2026 GroupCommerceProductItem structure
|
||||
*/
|
||||
export function parseFacebookItem(
|
||||
item: FacebookMarketplaceItem
|
||||
): FacebookListingDetails | null {
|
||||
try {
|
||||
const title = item.marketplace_listing_title || item.custom_title;
|
||||
if (!title) return null;
|
||||
|
||||
const url = `https://www.facebook.com/marketplace/item/${item.id}`;
|
||||
|
||||
// Extract price information
|
||||
let cents = 0;
|
||||
let currency = "CAD"; // Default
|
||||
let amountFormatted = item.formatted_price?.text || "FREE";
|
||||
|
||||
if (item.listing_price) {
|
||||
currency = item.listing_price.currency || "CAD";
|
||||
if (item.listing_price.amount && item.listing_price.amount !== "0.00") {
|
||||
const amount = Number.parseFloat(item.listing_price.amount);
|
||||
if (!Number.isNaN(amount)) {
|
||||
cents = Math.round(amount * 100);
|
||||
amountFormatted =
|
||||
item.formatted_price?.text || formatCentsToCurrency(cents / 100, "en-CA");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract description
|
||||
const description = item.redacted_description?.text;
|
||||
|
||||
// Extract location
|
||||
const address = item.location_text?.text || null;
|
||||
|
||||
// Extract seller information
|
||||
const seller = item.marketplace_listing_seller
|
||||
? {
|
||||
name: item.marketplace_listing_seller.name,
|
||||
id: item.marketplace_listing_seller.id,
|
||||
}
|
||||
: undefined;
|
||||
|
||||
// Determine listing status
|
||||
let listingStatus: string | undefined;
|
||||
if (item.is_sold) {
|
||||
listingStatus = "SOLD";
|
||||
} else if (item.is_pending) {
|
||||
listingStatus = "PENDING";
|
||||
} else if (item.is_live) {
|
||||
listingStatus = "ACTIVE";
|
||||
} else if (item.is_hidden) {
|
||||
listingStatus = "HIDDEN";
|
||||
}
|
||||
|
||||
// Format creation date
|
||||
const creationDate = item.creation_time
|
||||
? new Date(item.creation_time * 1000).toISOString()
|
||||
: undefined;
|
||||
|
||||
// Determine listing type based on category or vehicle data
|
||||
let listingType = "item";
|
||||
if (item.vehicle_make_display_name || item.vehicle_odometer_data) {
|
||||
listingType = "vehicle";
|
||||
}
|
||||
|
||||
const listingDetails: FacebookListingDetails = {
|
||||
url,
|
||||
title,
|
||||
description,
|
||||
listingPrice: {
|
||||
amountFormatted,
|
||||
cents,
|
||||
currency,
|
||||
},
|
||||
address,
|
||||
creationDate,
|
||||
listingType,
|
||||
listingStatus,
|
||||
categoryId: item.marketplace_listing_category_id,
|
||||
seller,
|
||||
deliveryTypes: item.delivery_types,
|
||||
};
|
||||
|
||||
return listingDetails;
|
||||
} catch (error) {
|
||||
console.warn(`Failed to parse Facebook item ${item.id}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchFacebookItems(
|
||||
@@ -480,13 +970,22 @@ export default async function fetchFacebookItems(
|
||||
LOCATION = "toronto",
|
||||
MAX_ITEMS = 25,
|
||||
cookiesSource?: string,
|
||||
cookiePath?: string
|
||||
) {
|
||||
// Load Facebook cookies - required for Facebook Marketplace access
|
||||
const cookies = await loadFacebookCookies(cookiesSource);
|
||||
let cookies: Cookie[];
|
||||
if (cookiesSource) {
|
||||
// Use provided cookie source (backward compatibility)
|
||||
cookies = await loadFacebookCookies(cookiesSource);
|
||||
} else {
|
||||
// Auto-load from file or parse from env var
|
||||
cookies = await ensureFacebookCookies(cookiePath);
|
||||
}
|
||||
|
||||
if (cookies.length === 0) {
|
||||
throw new Error(
|
||||
"Facebook cookies are required for marketplace access. " +
|
||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
|
||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies."
|
||||
);
|
||||
}
|
||||
|
||||
@@ -495,7 +994,7 @@ export default async function fetchFacebookItems(
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
||||
if (!cookiesHeader) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain."
|
||||
);
|
||||
}
|
||||
|
||||
@@ -517,8 +1016,7 @@ export default async function fetchFacebookItems(
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
"\n" +
|
||||
`Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
||||
);
|
||||
}
|
||||
},
|
||||
@@ -527,11 +1025,11 @@ export default async function fetchFacebookItems(
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.warn(
|
||||
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
|
||||
`\nFacebook marketplace access failed (${err.status}): ${err.message}`
|
||||
);
|
||||
if (err.status === 400 || err.status === 401 || err.status === 403) {
|
||||
console.warn(
|
||||
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
|
||||
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies."
|
||||
);
|
||||
}
|
||||
return [];
|
||||
@@ -549,17 +1047,17 @@ export default async function fetchFacebookItems(
|
||||
|
||||
const progressBar = new cliProgress.SingleBar(
|
||||
{},
|
||||
cliProgress.Presets.shades_classic,
|
||||
cliProgress.Presets.shades_classic
|
||||
);
|
||||
const totalProgress = ads.length;
|
||||
let currentProgress = 0;
|
||||
const currentProgress = 0;
|
||||
progressBar.start(totalProgress, currentProgress);
|
||||
|
||||
const items = parseFacebookAds(ads);
|
||||
|
||||
// Filter to only priced items (already done in parseFacebookAds)
|
||||
const pricedItems = items.filter(
|
||||
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
|
||||
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0
|
||||
);
|
||||
|
||||
progressBar.update(totalProgress);
|
||||
@@ -568,3 +1066,158 @@ export default async function fetchFacebookItems(
|
||||
console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
|
||||
return pricedItems.slice(0, MAX_ITEMS); // Limit results
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch individual Facebook marketplace item details with enhanced error handling
|
||||
*/
|
||||
export async function fetchFacebookItem(
|
||||
itemId: string,
|
||||
cookiesSource?: string,
|
||||
cookiePath?: string
|
||||
): Promise<FacebookListingDetails | null> {
|
||||
// Load Facebook cookies - required for Facebook Marketplace access
|
||||
let cookies: Cookie[];
|
||||
if (cookiesSource) {
|
||||
// Use provided cookie source (backward compatibility)
|
||||
cookies = await loadFacebookCookies(cookiesSource);
|
||||
} else {
|
||||
// Auto-load from file or parse from env var
|
||||
cookies = await ensureFacebookCookies(cookiePath);
|
||||
}
|
||||
|
||||
if (cookies.length === 0) {
|
||||
throw new Error(
|
||||
"Facebook cookies are required for marketplace access. " +
|
||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies."
|
||||
);
|
||||
}
|
||||
|
||||
// Format cookies for HTTP header
|
||||
const domain = "www.facebook.com";
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
||||
if (!cookiesHeader) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain."
|
||||
);
|
||||
}
|
||||
|
||||
const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`;
|
||||
|
||||
console.log(`Fetching Facebook marketplace item: ${itemUrl}`);
|
||||
|
||||
let itemHtml: string;
|
||||
try {
|
||||
itemHtml = await fetchHtml(itemUrl, 1000, {
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
||||
);
|
||||
}
|
||||
},
|
||||
cookies: cookiesHeader,
|
||||
});
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.warn(
|
||||
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`
|
||||
);
|
||||
|
||||
// Enhanced error handling based on status codes
|
||||
switch (err.status) {
|
||||
case 400:
|
||||
case 401:
|
||||
case 403:
|
||||
console.warn(
|
||||
"Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies."
|
||||
);
|
||||
console.warn(
|
||||
"Try logging out and back into Facebook, then export fresh cookies."
|
||||
);
|
||||
break;
|
||||
case 404:
|
||||
console.warn(
|
||||
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid."
|
||||
);
|
||||
break;
|
||||
case 429:
|
||||
console.warn(
|
||||
"Rate limited: Too many requests. Facebook is blocking access temporarily."
|
||||
);
|
||||
break;
|
||||
case 500:
|
||||
case 502:
|
||||
case 503:
|
||||
console.warn(
|
||||
"Facebook server error: Marketplace may be temporarily unavailable."
|
||||
);
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unexpected error status: ${err.status}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
const itemData = extractFacebookItemData(itemHtml);
|
||||
if (!itemData) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
// Enhanced checking for specific failure scenarios
|
||||
if (
|
||||
itemHtml.includes("This listing is no longer available") ||
|
||||
itemHtml.includes("listing has been removed") ||
|
||||
itemHtml.includes("This item has been sold")
|
||||
) {
|
||||
console.warn(
|
||||
`Item ${itemId} appears to be sold or removed from marketplace.`
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (
|
||||
itemHtml.includes("log in to Facebook") ||
|
||||
itemHtml.includes("You must log in") ||
|
||||
itemHtml.includes("authentication required")
|
||||
) {
|
||||
console.warn(
|
||||
`Authentication failed for item ${itemId}. Cookies may be expired.`
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`
|
||||
);
|
||||
console.warn(" - The listing was removed or sold");
|
||||
console.warn(" - Authentication issues");
|
||||
console.warn(" - Facebook changed their API structure");
|
||||
console.warn(" - Network or parsing issues");
|
||||
return null;
|
||||
}
|
||||
|
||||
logExtractionMetrics(true, itemId);
|
||||
console.log(`Successfully extracted data for item ${itemId}`);
|
||||
|
||||
const parsedItem = parseFacebookItem(itemData);
|
||||
if (!parsedItem) {
|
||||
console.warn(`Failed to parse item ${itemId}: Invalid data structure`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Check for sold/removed status in the parsed data with proper precedence
|
||||
if (itemData.is_sold) {
|
||||
console.warn(`Item ${itemId} is marked as sold in the marketplace.`);
|
||||
// Still return the data but mark it as sold
|
||||
parsedItem.listingStatus = "SOLD";
|
||||
} else if (!itemData.is_live) {
|
||||
console.warn(`Item ${itemId} is not live/active in the marketplace.`);
|
||||
parsedItem.listingStatus = itemData.is_hidden
|
||||
? "HIDDEN"
|
||||
: itemData.is_pending
|
||||
? "PENDING"
|
||||
: "INACTIVE";
|
||||
}
|
||||
|
||||
return parsedItem;
|
||||
}
|
||||
|
||||
@@ -2,7 +2,15 @@
|
||||
import { parseHTML } from "linkedom";
|
||||
import unidecode from "unidecode";
|
||||
import cliProgress from "cli-progress";
|
||||
import { fetchHtml, isRecord, HttpError } from "../utils/http";
|
||||
import {
|
||||
fetchHtml,
|
||||
isRecord,
|
||||
HttpError,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
ValidationError,
|
||||
} from "../utils/http";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import type { HTMLString } from "../types/common";
|
||||
@@ -26,16 +34,29 @@ interface ApolloListingRoot {
|
||||
url?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
price?: { amount?: number | string; currency?: string };
|
||||
price?: { amount?: number | string; currency?: string; type?: string };
|
||||
type?: string;
|
||||
status?: string;
|
||||
activationDate?: string;
|
||||
endDate?: string;
|
||||
metrics?: { views?: number | string };
|
||||
location?: { address?: string | null };
|
||||
location?: {
|
||||
address?: string | null;
|
||||
id?: number;
|
||||
name?: string;
|
||||
coordinates?: { latitude: number; longitude: number };
|
||||
};
|
||||
imageUrls?: string[];
|
||||
imageCount?: number;
|
||||
categoryId?: number;
|
||||
adSource?: string;
|
||||
flags?: { topAd?: boolean; priceDrop?: boolean };
|
||||
posterInfo?: { posterId?: string; rating?: number };
|
||||
attributes?: Array<{ canonicalName?: string; canonicalValues?: string[] }>;
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
// Keep existing interface for backward compatibility
|
||||
export interface KijijiListingDetails {
|
||||
url: string;
|
||||
title: string;
|
||||
@@ -53,10 +74,173 @@ export interface KijijiListingDetails {
|
||||
address?: string | null;
|
||||
}
|
||||
|
||||
// New comprehensive interface for detailed listings
|
||||
export interface DetailedListing extends KijijiListingDetails {
|
||||
images: string[];
|
||||
categoryId: number;
|
||||
adSource: string;
|
||||
flags: {
|
||||
topAd: boolean;
|
||||
priceDrop: boolean;
|
||||
};
|
||||
attributes: Record<string, string[]>;
|
||||
location: {
|
||||
id: number;
|
||||
name: string;
|
||||
coordinates?: {
|
||||
latitude: number;
|
||||
longitude: number;
|
||||
};
|
||||
};
|
||||
sellerInfo?: {
|
||||
posterId: string;
|
||||
rating?: number;
|
||||
accountType?: string;
|
||||
memberSince?: string;
|
||||
reviewCount?: number;
|
||||
reviewScore?: number;
|
||||
};
|
||||
}
|
||||
|
||||
// Configuration interfaces
|
||||
export interface SearchOptions {
|
||||
location?: number | string; // Location ID or name
|
||||
category?: number | string; // Category ID or name
|
||||
keywords?: string;
|
||||
sortBy?: "relevancy" | "date" | "price" | "distance";
|
||||
sortOrder?: "desc" | "asc";
|
||||
maxPages?: number; // Default: 5
|
||||
priceMin?: number;
|
||||
priceMax?: number;
|
||||
}
|
||||
|
||||
export interface ListingFetchOptions {
|
||||
includeImages?: boolean; // Default: true
|
||||
sellerDataDepth?: "basic" | "detailed" | "full"; // Default: 'detailed'
|
||||
includeClientSideData?: boolean; // Default: false
|
||||
}
|
||||
|
||||
// ----------------------------- Constants & Mappings -----------------------------
|
||||
|
||||
// Location mappings
|
||||
const LOCATION_MAPPINGS: Record<string, number> = {
|
||||
canada: 0,
|
||||
ontario: 9004,
|
||||
toronto: 1700273,
|
||||
gta: 1700272,
|
||||
oshawa: 1700275,
|
||||
quebec: 9001,
|
||||
"nova scotia": 9002,
|
||||
alberta: 9003,
|
||||
"new brunswick": 9005,
|
||||
manitoba: 9006,
|
||||
"british columbia": 9007,
|
||||
newfoundland: 9008,
|
||||
saskatchewan: 9009,
|
||||
territories: 9010,
|
||||
pei: 9011,
|
||||
"prince edward island": 9011,
|
||||
};
|
||||
|
||||
// Category mappings (Buy & Sell main categories)
|
||||
const CATEGORY_MAPPINGS: Record<string, number> = {
|
||||
all: 0,
|
||||
"buy-sell": 10,
|
||||
"arts-collectibles": 12,
|
||||
audio: 767,
|
||||
"baby-items": 253,
|
||||
"bags-luggage": 931,
|
||||
bikes: 644,
|
||||
books: 109,
|
||||
cameras: 103,
|
||||
cds: 104,
|
||||
clothing: 274,
|
||||
computers: 16,
|
||||
"computer-accessories": 128,
|
||||
electronics: 29659001,
|
||||
"free-stuff": 17220001,
|
||||
furniture: 235,
|
||||
"garage-sales": 638,
|
||||
"health-special-needs": 140,
|
||||
"hobbies-crafts": 139,
|
||||
"home-appliances": 107,
|
||||
"home-indoor": 717,
|
||||
"home-outdoor": 727,
|
||||
jewellery: 133,
|
||||
"musical-instruments": 17,
|
||||
phones: 132,
|
||||
"sporting-goods": 111,
|
||||
tools: 110,
|
||||
"toys-games": 108,
|
||||
"tvs-video": 15093001,
|
||||
"video-games": 141,
|
||||
other: 26,
|
||||
};
|
||||
|
||||
// Sort parameter mappings
|
||||
const SORT_MAPPINGS: Record<string, string> = {
|
||||
relevancy: "MATCH",
|
||||
date: "DATE",
|
||||
price: "PRICE",
|
||||
distance: "DISTANCE",
|
||||
};
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
const SEPS = new Set([" ", "–", "—", "/", ":", ";", ",", ".", "-"]);
|
||||
|
||||
/**
|
||||
* Resolve location ID from name or return numeric ID
|
||||
*/
|
||||
export function resolveLocationId(location?: number | string): number {
|
||||
if (typeof location === "number") return location;
|
||||
if (typeof location === "string") {
|
||||
const normalized = location.toLowerCase().replace(/\s+/g, "-");
|
||||
return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0)
|
||||
}
|
||||
return 0; // Default to Canada
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve category ID from name or return numeric ID
|
||||
*/
|
||||
export function resolveCategoryId(category?: number | string): number {
|
||||
if (typeof category === "number") return category;
|
||||
if (typeof category === "string") {
|
||||
const normalized = category.toLowerCase().replace(/\s+/g, "-");
|
||||
return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories
|
||||
}
|
||||
return 0; // Default to all categories
|
||||
}
|
||||
|
||||
/**
|
||||
* Build search URL with enhanced parameters
|
||||
*/
|
||||
export function buildSearchUrl(
|
||||
keywords: string,
|
||||
options: SearchOptions & { page?: number },
|
||||
BASE_URL = "https://www.kijiji.ca"
|
||||
): string {
|
||||
const locationId = resolveLocationId(options.location);
|
||||
const categoryId = resolveCategoryId(options.category);
|
||||
|
||||
const categorySlug = categoryId === 0 ? "buy-sell" : "buy-sell";
|
||||
const locationSlug = locationId === 0 ? "canada" : "canada";
|
||||
|
||||
let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`;
|
||||
|
||||
const sortParam = options.sortBy
|
||||
? `&sort=${SORT_MAPPINGS[options.sortBy]}`
|
||||
: "";
|
||||
const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC";
|
||||
const pageParam =
|
||||
options.page && options.page > 1 ? `&page=${options.page}` : "";
|
||||
|
||||
url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`;
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Slugifies a string for Kijiji search URLs
|
||||
*/
|
||||
@@ -67,13 +251,14 @@ export function slugify(input: string): string {
|
||||
|
||||
for (let i = 0; i < s.length; i++) {
|
||||
const ch = s[i];
|
||||
const code = ch!.charCodeAt(0);
|
||||
if (!ch) continue;
|
||||
const code = ch.charCodeAt(0);
|
||||
|
||||
// a-z or 0-9
|
||||
if ((code >= 97 && code <= 122) || (code >= 48 && code <= 57)) {
|
||||
out.push(ch!);
|
||||
out.push(ch);
|
||||
lastHyphen = false;
|
||||
} else if (SEPS.has(ch!)) {
|
||||
} else if (SEPS.has(ch)) {
|
||||
if (!lastHyphen) {
|
||||
out.push("-");
|
||||
lastHyphen = true;
|
||||
@@ -84,12 +269,154 @@ export function slugify(input: string): string {
|
||||
return out.join("");
|
||||
}
|
||||
|
||||
// ----------------------------- GraphQL Client -----------------------------
|
||||
|
||||
// GraphQL response interfaces
|
||||
interface GraphQLReviewResponse {
|
||||
user?: {
|
||||
reviewSummary?: {
|
||||
count?: number;
|
||||
score?: number;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
interface GraphQLProfileResponse {
|
||||
user?: {
|
||||
memberSince?: string;
|
||||
accountType?: string;
|
||||
};
|
||||
}
|
||||
|
||||
// GraphQL queries
|
||||
const GRAPHQL_QUERIES = {
|
||||
getReviewSummary: `
|
||||
query GetReviewSummary($userId: String!) {
|
||||
user(id: $userId) {
|
||||
reviewSummary {
|
||||
count
|
||||
score
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
`,
|
||||
getProfileMetrics: `
|
||||
query GetProfileMetrics($profileId: String!) {
|
||||
user(id: $profileId) {
|
||||
memberSince
|
||||
accountType
|
||||
__typename
|
||||
}
|
||||
}
|
||||
`,
|
||||
} as const;
|
||||
|
||||
/**
|
||||
* Fetch additional data via GraphQL API
|
||||
*/
|
||||
async function fetchGraphQLData(
|
||||
query: string,
|
||||
variables: Record<string, unknown>,
|
||||
BASE_URL = "https://www.kijiji.ca"
|
||||
): Promise<unknown> {
|
||||
const endpoint = `${BASE_URL}/anvil/api`;
|
||||
|
||||
try {
|
||||
const response = await fetch(endpoint, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"apollo-require-preflight": "true",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
query,
|
||||
variables,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new HttpError(
|
||||
`GraphQL request failed with status ${response.status}`,
|
||||
response.status,
|
||||
endpoint
|
||||
);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
if (result.errors) {
|
||||
throw new ParseError(
|
||||
`GraphQL errors: ${JSON.stringify(result.errors)}`,
|
||||
result.errors
|
||||
);
|
||||
}
|
||||
|
||||
return result.data;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError || err instanceof ParseError) {
|
||||
throw err;
|
||||
}
|
||||
throw new NetworkError(
|
||||
`Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`,
|
||||
endpoint,
|
||||
err instanceof Error ? err : undefined
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch additional seller data via GraphQL
|
||||
*/
|
||||
async function fetchSellerDetails(
|
||||
posterId: string,
|
||||
BASE_URL = "https://www.kijiji.ca"
|
||||
): Promise<{
|
||||
reviewCount?: number;
|
||||
reviewScore?: number;
|
||||
memberSince?: string;
|
||||
accountType?: string;
|
||||
}> {
|
||||
try {
|
||||
const [reviewData, profileData] = await Promise.all([
|
||||
fetchGraphQLData(
|
||||
GRAPHQL_QUERIES.getReviewSummary,
|
||||
{ userId: posterId },
|
||||
BASE_URL
|
||||
),
|
||||
fetchGraphQLData(
|
||||
GRAPHQL_QUERIES.getProfileMetrics,
|
||||
{ profileId: posterId },
|
||||
BASE_URL
|
||||
),
|
||||
]);
|
||||
|
||||
const reviewResponse = reviewData as GraphQLReviewResponse;
|
||||
const profileResponse = profileData as GraphQLProfileResponse;
|
||||
|
||||
return {
|
||||
reviewCount: reviewResponse?.user?.reviewSummary?.count,
|
||||
reviewScore: reviewResponse?.user?.reviewSummary?.score,
|
||||
memberSince: profileResponse?.user?.memberSince,
|
||||
accountType: profileResponse?.user?.accountType,
|
||||
};
|
||||
} catch (err) {
|
||||
// Silently fail for GraphQL errors - not critical for basic functionality
|
||||
console.warn(
|
||||
`Failed to fetch seller details for ${posterId}:`,
|
||||
err instanceof Error ? err.message : String(err)
|
||||
);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
|
||||
/**
|
||||
Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML.
|
||||
*/
|
||||
function extractApolloState(htmlString: HTMLString): ApolloRecord | null {
|
||||
export function extractApolloState(htmlString: HTMLString): ApolloRecord | null {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const nextData = document.getElementById("__NEXT_DATA__");
|
||||
if (!nextData || !nextData.textContent) return null;
|
||||
@@ -107,9 +434,9 @@ function extractApolloState(htmlString: HTMLString): ApolloRecord | null {
|
||||
Parse search page apollo state into SearchListing[].
|
||||
Filters keys likely to be listing entities and ensures url/title exist.
|
||||
*/
|
||||
function parseSearch(
|
||||
export function parseSearch(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string,
|
||||
BASE_URL: string
|
||||
): SearchListing[] {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return [];
|
||||
@@ -134,18 +461,18 @@ function parseSearch(
|
||||
}
|
||||
|
||||
/**
|
||||
Parse a listing page into a typed object.
|
||||
Parse a listing page into a typed object (backward compatible).
|
||||
*/
|
||||
function parseListing(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string,
|
||||
BASE_URL: string
|
||||
): KijijiListingDetails | null {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return null;
|
||||
|
||||
// Find the listing root key
|
||||
const listingKey = Object.keys(apolloState).find((k) =>
|
||||
k.includes("Listing"),
|
||||
k.includes("Listing")
|
||||
);
|
||||
if (!listingKey) return null;
|
||||
|
||||
@@ -167,9 +494,7 @@ function parseListing(
|
||||
|
||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||
const amountFormatted =
|
||||
cents != null
|
||||
? formatCentsToCurrency(cents / 100, "en-CA")
|
||||
: undefined;
|
||||
cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined;
|
||||
|
||||
const numberOfViews =
|
||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||
@@ -203,88 +528,291 @@ function parseListing(
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a listing page into a detailed object with all available fields
|
||||
*/
|
||||
export async function parseDetailedListing(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string,
|
||||
options: ListingFetchOptions = {}
|
||||
): Promise<DetailedListing | null> {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return null;
|
||||
|
||||
// Find the listing root key
|
||||
const listingKey = Object.keys(apolloState).find((k) =>
|
||||
k.includes("Listing")
|
||||
);
|
||||
if (!listingKey) return null;
|
||||
|
||||
const root = apolloState[listingKey];
|
||||
if (!isRecord(root)) return null;
|
||||
|
||||
const {
|
||||
url,
|
||||
title,
|
||||
description,
|
||||
price,
|
||||
type,
|
||||
status,
|
||||
activationDate,
|
||||
endDate,
|
||||
metrics,
|
||||
location,
|
||||
imageUrls,
|
||||
categoryId,
|
||||
adSource,
|
||||
flags,
|
||||
posterInfo,
|
||||
attributes,
|
||||
} = root as ApolloListingRoot;
|
||||
|
||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||
const amountFormatted =
|
||||
cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined;
|
||||
|
||||
const numberOfViews =
|
||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||
|
||||
const listingUrl =
|
||||
typeof url === "string"
|
||||
? url.startsWith("http")
|
||||
? url
|
||||
: `${BASE_URL}${url}`
|
||||
: "";
|
||||
|
||||
if (!listingUrl || !title) return null;
|
||||
|
||||
// Only include fixed-price listings
|
||||
if (!amountFormatted || cents === undefined) return null;
|
||||
|
||||
// Extract images if requested
|
||||
const images =
|
||||
options.includeImages !== false && Array.isArray(imageUrls)
|
||||
? imageUrls.filter((url): url is string => typeof url === "string")
|
||||
: [];
|
||||
|
||||
// Extract attributes as key-value pairs
|
||||
const attributeMap: Record<string, string[]> = {};
|
||||
if (Array.isArray(attributes)) {
|
||||
for (const attr of attributes) {
|
||||
if (attr?.canonicalName && Array.isArray(attr.canonicalValues)) {
|
||||
attributeMap[attr.canonicalName] = attr.canonicalValues;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract seller info based on depth setting
|
||||
let sellerInfo: DetailedListing["sellerInfo"];
|
||||
const depth = options.sellerDataDepth ?? "detailed";
|
||||
|
||||
if (posterInfo?.posterId) {
|
||||
sellerInfo = {
|
||||
posterId: posterInfo.posterId,
|
||||
rating:
|
||||
typeof posterInfo.rating === "number" ? posterInfo.rating : undefined,
|
||||
};
|
||||
|
||||
// Add more detailed info if requested and client-side data is enabled
|
||||
if (
|
||||
(depth === "detailed" || depth === "full") &&
|
||||
options.includeClientSideData
|
||||
) {
|
||||
try {
|
||||
const additionalData = await fetchSellerDetails(
|
||||
posterInfo.posterId,
|
||||
BASE_URL
|
||||
);
|
||||
sellerInfo = {
|
||||
...sellerInfo,
|
||||
...additionalData,
|
||||
};
|
||||
} catch {
|
||||
// Silently fail - GraphQL data is optional
|
||||
console.warn(
|
||||
`Failed to fetch additional seller data for ${posterInfo.posterId}`
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
url: listingUrl,
|
||||
title,
|
||||
description,
|
||||
listingPrice: {
|
||||
amountFormatted,
|
||||
cents,
|
||||
currency: price?.currency,
|
||||
},
|
||||
listingType: type,
|
||||
listingStatus: status,
|
||||
creationDate: activationDate,
|
||||
endDate,
|
||||
numberOfViews:
|
||||
numberOfViews !== undefined && Number.isFinite(numberOfViews)
|
||||
? numberOfViews
|
||||
: undefined,
|
||||
address: location?.address ?? null,
|
||||
images,
|
||||
categoryId: typeof categoryId === "number" ? categoryId : 0,
|
||||
adSource: typeof adSource === "string" ? adSource : "UNKNOWN",
|
||||
flags: {
|
||||
topAd: flags?.topAd === true,
|
||||
priceDrop: flags?.priceDrop === true,
|
||||
},
|
||||
attributes: attributeMap,
|
||||
location: {
|
||||
id: typeof location?.id === "number" ? location.id : 0,
|
||||
name: typeof location?.name === "string" ? location.name : "Unknown",
|
||||
coordinates: location?.coordinates
|
||||
? {
|
||||
latitude: location.coordinates.latitude,
|
||||
longitude: location.coordinates.longitude,
|
||||
}
|
||||
: undefined,
|
||||
},
|
||||
sellerInfo,
|
||||
};
|
||||
}
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
BASE_URL = "https://www.kijiji.ca",
|
||||
searchOptions: SearchOptions = {},
|
||||
listingOptions: ListingFetchOptions = {}
|
||||
) {
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
|
||||
const searchUrl = `${BASE_URL}/b-gta-greater-toronto-area/${slugify(SEARCH_QUERY)}/k0l1700272?sort=relevancyDesc&view=list`;
|
||||
// Set defaults for configuration
|
||||
const finalSearchOptions: Required<SearchOptions> = {
|
||||
location: searchOptions.location ?? 1700272, // Default to GTA
|
||||
category: searchOptions.category ?? 0, // Default to all categories
|
||||
keywords: searchOptions.keywords ?? SEARCH_QUERY,
|
||||
sortBy: searchOptions.sortBy ?? "relevancy",
|
||||
sortOrder: searchOptions.sortOrder ?? "desc",
|
||||
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
||||
priceMin: searchOptions.priceMin as number,
|
||||
priceMax: searchOptions.priceMax as number,
|
||||
};
|
||||
|
||||
console.log(`Fetching search: ${searchUrl}`);
|
||||
const searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
|
||||
maxRetries: 3,
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
"\n" +
|
||||
`Search - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
const finalListingOptions: Required<ListingFetchOptions> = {
|
||||
includeImages: listingOptions.includeImages ?? true,
|
||||
sellerDataDepth: listingOptions.sellerDataDepth ?? "detailed",
|
||||
includeClientSideData: listingOptions.includeClientSideData ?? false,
|
||||
};
|
||||
|
||||
const allListings: DetailedListing[] = [];
|
||||
const seenUrls = new Set<string>();
|
||||
|
||||
// Fetch multiple pages
|
||||
for (let page = 1; page <= finalSearchOptions.maxPages; page++) {
|
||||
const searchUrl = buildSearchUrl(
|
||||
finalSearchOptions.keywords,
|
||||
{
|
||||
...finalSearchOptions,
|
||||
// Add page parameter for pagination
|
||||
...(page > 1 && { page }),
|
||||
},
|
||||
BASE_URL
|
||||
);
|
||||
|
||||
console.log(`Fetching search page ${page}: ${searchUrl}`);
|
||||
const searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const searchResults = parseSearch(searchHtml, BASE_URL);
|
||||
if (searchResults.length === 0) {
|
||||
console.log(
|
||||
`No more results found on page ${page}. Stopping pagination.`
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
// Deduplicate links across pages
|
||||
const newListingLinks = searchResults
|
||||
.map((r) => r.listingLink)
|
||||
.filter((link) => !seenUrls.has(link));
|
||||
|
||||
for (const link of newListingLinks) {
|
||||
seenUrls.add(link);
|
||||
}
|
||||
|
||||
console.log(
|
||||
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`
|
||||
);
|
||||
|
||||
// Fetch details for this page's listings
|
||||
const progressBar = new cliProgress.SingleBar(
|
||||
{},
|
||||
cliProgress.Presets.shades_classic
|
||||
);
|
||||
const totalProgress = newListingLinks.length;
|
||||
let currentProgress = 0;
|
||||
progressBar.start(totalProgress, currentProgress);
|
||||
|
||||
for (const link of newListingLinks) {
|
||||
try {
|
||||
const html = await fetchHtml(link, DELAY_MS, {
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
const parsed = await parseDetailedListing(
|
||||
html,
|
||||
BASE_URL,
|
||||
finalListingOptions
|
||||
);
|
||||
if (parsed) {
|
||||
allListings.push(parsed);
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`
|
||||
);
|
||||
} else {
|
||||
console.error(
|
||||
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
currentProgress++;
|
||||
progressBar.update(currentProgress);
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const searchResults = parseSearch(searchHtml, BASE_URL);
|
||||
if (searchResults.length === 0) {
|
||||
console.warn("No search results parsed from page.");
|
||||
return;
|
||||
}
|
||||
progressBar.stop();
|
||||
|
||||
// Deduplicate links
|
||||
const listingLinks = Array.from(
|
||||
new Set(searchResults.map((r) => r.listingLink)),
|
||||
);
|
||||
|
||||
console.log(
|
||||
"\n" + `Found ${listingLinks.length} listing links. Fetching details...`,
|
||||
);
|
||||
|
||||
const progressBar = new cliProgress.SingleBar(
|
||||
{},
|
||||
cliProgress.Presets.shades_classic,
|
||||
);
|
||||
const totalProgress = listingLinks.length;
|
||||
let currentProgress = 0;
|
||||
progressBar.start(totalProgress, currentProgress);
|
||||
|
||||
const items: KijijiListingDetails[] = [];
|
||||
for (const link of listingLinks) {
|
||||
try {
|
||||
const html = await fetchHtml(link, DELAY_MS, {
|
||||
maxRetries: 3,
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
"\n" +
|
||||
`Item - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
const parsed = parseListing(html, BASE_URL);
|
||||
if (parsed) {
|
||||
if (parsed.listingPrice?.cents) items.push(parsed);
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
"\n" + `Failed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
|
||||
);
|
||||
} else {
|
||||
console.error(
|
||||
"\n" +
|
||||
`Failed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
currentProgress++;
|
||||
progressBar.update(currentProgress);
|
||||
// If we got fewer results than expected (40 per page), we've reached the end
|
||||
if (searchResults.length < 40) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
console.log("\n" + `Parsed ${items.length} listings.`);
|
||||
return items;
|
||||
console.log(`\nParsed ${allListings.length} detailed listings.`);
|
||||
return allListings;
|
||||
}
|
||||
|
||||
// Re-export error classes for convenience
|
||||
export {
|
||||
HttpError,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
ValidationError,
|
||||
};
|
||||
|
||||
@@ -1,87 +1,200 @@
|
||||
/** Custom error class for HTTP-related failures */
|
||||
export class HttpError extends Error {
|
||||
constructor(
|
||||
public statusCode: number,
|
||||
message: string
|
||||
message: string,
|
||||
public readonly statusCode: number,
|
||||
public readonly url?: string
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for network failures (timeouts, connection issues) */
|
||||
export class NetworkError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly url: string,
|
||||
public readonly cause?: Error
|
||||
) {
|
||||
super(message);
|
||||
this.name = "NetworkError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for parsing failures */
|
||||
export class ParseError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly data?: unknown
|
||||
) {
|
||||
super(message);
|
||||
this.name = "ParseError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for rate limiting */
|
||||
export class RateLimitError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly url: string,
|
||||
public readonly resetTime?: number
|
||||
) {
|
||||
super(message);
|
||||
this.name = "RateLimitError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for validation failures */
|
||||
export class ValidationError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = "ValidationError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Type guard to check if a value is a record (object) */
|
||||
export function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch HTML content from a URL with automatic retries
|
||||
* Calculate exponential backoff delay with jitter
|
||||
*/
|
||||
function calculateBackoffDelay(attempt: number, baseMs: number): number {
|
||||
const exponentialDelay = baseMs * 2 ** attempt;
|
||||
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
|
||||
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
|
||||
}
|
||||
|
||||
/** Options for fetchHtml */
|
||||
export interface FetchHtmlOptions {
|
||||
maxRetries?: number;
|
||||
retryBaseMs?: number;
|
||||
timeoutMs?: number;
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
headers?: Record<string, string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch HTML content from a URL with automatic retries, timeout, and exponential backoff
|
||||
* @param url - The URL to fetch
|
||||
* @param delayMs - Delay in milliseconds between retries
|
||||
* @param delayMs - Delay in milliseconds between requests (rate limiting)
|
||||
* @param opts - Optional fetch options
|
||||
* @returns The HTML content as a string
|
||||
* @throws HttpError if all retries are exhausted
|
||||
* @throws HttpError, NetworkError, or RateLimitError on failure
|
||||
*/
|
||||
export async function fetchHtml(
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts?: RequestInit
|
||||
opts?: FetchHtmlOptions
|
||||
): Promise<string> {
|
||||
const maxAttempts = 3;
|
||||
let lastError: Error | null = null;
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 1000;
|
||||
const timeoutMs = opts?.timeoutMs ?? 30000;
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
const defaultHeaders: Record<string, string> = {
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||
"cache-control": "no-cache",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
|
||||
};
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const response = await fetch(url, opts);
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
// Check for rate limiting
|
||||
if (response.status === 429) {
|
||||
const retryAfter = response.headers.get("Retry-After");
|
||||
const waitTime = retryAfter ? parseInt(retryAfter) * 1000 : delayMs * (attempt + 1);
|
||||
console.warn(
|
||||
`Rate limited. Retrying after ${waitTime}ms...`
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, waitTime));
|
||||
continue;
|
||||
}
|
||||
const res = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: { ...defaultHeaders, ...opts?.headers },
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
// Check for server errors
|
||||
if (response.status >= 500) {
|
||||
lastError = new HttpError(
|
||||
response.status,
|
||||
`Server error: ${response.status}`
|
||||
);
|
||||
if (attempt < maxAttempts - 1) {
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
||||
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
||||
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
||||
|
||||
if (!res.ok) {
|
||||
// Handle rate limiting
|
||||
if (res.status === 429) {
|
||||
const resetSeconds = rateLimitReset
|
||||
? Number(rateLimitReset)
|
||||
: Number.NaN;
|
||||
const waitMs = Number.isFinite(resetSeconds)
|
||||
? Math.max(0, resetSeconds * 1000)
|
||||
: calculateBackoffDelay(attempt, retryBaseMs);
|
||||
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||
continue;
|
||||
}
|
||||
throw new RateLimitError(
|
||||
`Rate limit exceeded for ${url}`,
|
||||
url,
|
||||
resetSeconds
|
||||
);
|
||||
}
|
||||
|
||||
// Retry on server errors
|
||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, delayMs * (attempt + 1))
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
// Check for successful response
|
||||
if (!response.ok) {
|
||||
throw new HttpError(
|
||||
response.status,
|
||||
`HTTP ${response.status}: ${response.statusText}`
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url
|
||||
);
|
||||
}
|
||||
|
||||
return await response.text();
|
||||
} catch (error) {
|
||||
lastError =
|
||||
error instanceof Error
|
||||
? error
|
||||
: new Error("Unknown error during fetch");
|
||||
const html = await res.text();
|
||||
|
||||
if (attempt < maxAttempts - 1) {
|
||||
// Respect per-request delay to maintain rate limiting
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
return html;
|
||||
} catch (err) {
|
||||
// Re-throw known errors
|
||||
if (
|
||||
err instanceof RateLimitError ||
|
||||
err instanceof HttpError ||
|
||||
err instanceof NetworkError
|
||||
) {
|
||||
throw err;
|
||||
}
|
||||
|
||||
if (err instanceof Error && err.name === "AbortError") {
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new NetworkError(`Request timeout for ${url}`, url, err);
|
||||
}
|
||||
|
||||
// Network or other errors
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, delayMs * (attempt + 1))
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new NetworkError(
|
||||
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
||||
url,
|
||||
err instanceof Error ? err : undefined
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError || new HttpError(0, "Failed to fetch after retries");
|
||||
throw new NetworkError(`Exhausted retries without response for ${url}`, url);
|
||||
}
|
||||
|
||||
834
packages/core/test/facebook-core.test.ts
Normal file
834
packages/core/test/facebook-core.test.ts
Normal file
@@ -0,0 +1,834 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import {
|
||||
extractFacebookItemData,
|
||||
extractFacebookMarketplaceData,
|
||||
fetchFacebookItem,
|
||||
formatCentsToCurrency,
|
||||
formatCookiesForHeader,
|
||||
loadFacebookCookies,
|
||||
parseFacebookAds,
|
||||
parseFacebookCookieString,
|
||||
parseFacebookItem,
|
||||
} from "../src/scrapers/facebook";
|
||||
|
||||
// Mock fetch globally
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
beforeEach(() => {
|
||||
global.fetch = mock(() => {
|
||||
throw new Error("fetch should be mocked in individual tests");
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
});
|
||||
|
||||
describe("Cookie Parsing", () => {
|
||||
describe("parseFacebookCookieString", () => {
|
||||
test("should parse valid cookie string", () => {
|
||||
const cookieString = "c_user=123456789; xs=abcdef123456; fr=xyz789";
|
||||
const result = parseFacebookCookieString(cookieString);
|
||||
|
||||
expect(result).toHaveLength(3);
|
||||
expect(result[0]).toEqual({
|
||||
name: "c_user",
|
||||
value: "123456789",
|
||||
domain: ".facebook.com",
|
||||
path: "/",
|
||||
secure: true,
|
||||
httpOnly: false,
|
||||
sameSite: "lax",
|
||||
expirationDate: undefined,
|
||||
});
|
||||
expect(result[1]).toEqual({
|
||||
name: "xs",
|
||||
value: "abcdef123456",
|
||||
domain: ".facebook.com",
|
||||
path: "/",
|
||||
secure: true,
|
||||
httpOnly: false,
|
||||
sameSite: "lax",
|
||||
expirationDate: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
test("should handle URL-encoded values", () => {
|
||||
const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
|
||||
const result = parseFacebookCookieString(cookieString);
|
||||
|
||||
expect(result[0].value).toBe("123+456");
|
||||
expect(result[1].value).toBe("abc=def");
|
||||
});
|
||||
|
||||
test("should filter out malformed cookies", () => {
|
||||
const cookieString = "c_user=123; invalid; xs=abc; =empty";
|
||||
const result = parseFacebookCookieString(cookieString);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result.map((c) => c.name)).toEqual(["c_user", "xs"]);
|
||||
});
|
||||
|
||||
test("should handle empty input", () => {
|
||||
expect(parseFacebookCookieString("")).toEqual([]);
|
||||
expect(parseFacebookCookieString(" ")).toEqual([]);
|
||||
});
|
||||
|
||||
test("should handle extra whitespace", () => {
|
||||
const cookieString = " c_user = 123 ; xs=abc ";
|
||||
const result = parseFacebookCookieString(cookieString);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0].name).toBe("c_user");
|
||||
expect(result[0].value).toBe("123");
|
||||
expect(result[1].name).toBe("xs");
|
||||
expect(result[1].value).toBe("abc");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Facebook Item Fetching", () => {
|
||||
describe("fetchFacebookItem", () => {
|
||||
const mockCookies = JSON.stringify([
|
||||
{ name: "c_user", value: "12345", domain: ".facebook.com" },
|
||||
{ name: "xs", value: "abc123", domain: ".facebook.com" },
|
||||
]);
|
||||
|
||||
test("should handle authentication errors", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 401,
|
||||
text: () => Promise.resolve("Authentication required"),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await fetchFacebookItem("123", mockCookies);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should handle item not found", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 404,
|
||||
text: () => Promise.resolve("Not found"),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await fetchFacebookItem("nonexistent", mockCookies);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should handle rate limiting", async () => {
|
||||
let attempts = 0;
|
||||
global.fetch = mock(() => {
|
||||
attempts++;
|
||||
if (attempts === 1) {
|
||||
return Promise.resolve({
|
||||
ok: false,
|
||||
status: 429,
|
||||
headers: {
|
||||
get: (header: string) => {
|
||||
if (header === "X-RateLimit-Reset") return "1";
|
||||
return null;
|
||||
},
|
||||
},
|
||||
text: () => Promise.resolve("Rate limited"),
|
||||
});
|
||||
}
|
||||
const mockData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
viewer: {
|
||||
marketplace_product_details_page: {
|
||||
target: {
|
||||
id: "123",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Test Item",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
const result = await fetchFacebookItem("123", mockCookies);
|
||||
expect(attempts).toBe(2);
|
||||
// Should eventually succeed after retry
|
||||
});
|
||||
|
||||
test("should handle sold items", async () => {
|
||||
const mockData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
viewer: {
|
||||
marketplace_product_details_page: {
|
||||
target: {
|
||||
id: "456",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Sold Item",
|
||||
is_sold: true,
|
||||
is_live: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await fetchFacebookItem("456", mockCookies);
|
||||
expect(result?.listingStatus).toBe("SOLD");
|
||||
});
|
||||
|
||||
test("should handle missing authentication cookies", async () => {
|
||||
// Use a test-specific cookie file that doesn't exist
|
||||
const testCookiePath = "./cookies/facebook-test.json";
|
||||
|
||||
// Test with no cookies available (test file doesn't exist)
|
||||
await expect(
|
||||
fetchFacebookItem("123", undefined, testCookiePath),
|
||||
).rejects.toThrow("No valid Facebook cookies found");
|
||||
});
|
||||
|
||||
test("should handle successful item extraction", async () => {
|
||||
const mockData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
viewer: {
|
||||
marketplace_product_details_page: {
|
||||
target: {
|
||||
id: "789",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Working Item",
|
||||
formatted_price: { text: "$299.00" },
|
||||
listing_price: {
|
||||
amount: "299.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
creation_time: 1640995200,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await fetchFacebookItem("789", mockCookies);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.title).toBe("Working Item");
|
||||
expect(result?.listingPrice?.amountFormatted).toBe("$299.00");
|
||||
expect(result?.listingStatus).toBe("ACTIVE");
|
||||
});
|
||||
|
||||
test("should handle server errors", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 500,
|
||||
text: () => Promise.resolve("Internal Server Error"),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await fetchFacebookItem("error", mockCookies);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Data Extraction", () => {
|
||||
describe("extractFacebookItemData", () => {
|
||||
test("should extract item data from standard require structure", () => {
|
||||
const mockItemData = {
|
||||
id: "123456",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Test Item",
|
||||
formatted_price: { text: "$100.00" },
|
||||
listing_price: { amount: "100.00", currency: "CAD" },
|
||||
is_live: true,
|
||||
};
|
||||
const mockData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
viewer: {
|
||||
marketplace_product_details_page: {
|
||||
target: mockItemData,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
|
||||
|
||||
const result = extractFacebookItemData(html);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.id).toBe("123456");
|
||||
expect(result?.marketplace_listing_title).toBe("Test Item");
|
||||
});
|
||||
|
||||
test("should handle missing item data", () => {
|
||||
const mockData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
viewer: {
|
||||
marketplace_product_details_page: {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
|
||||
|
||||
const result = extractFacebookItemData(html);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should handle malformed HTML", () => {
|
||||
const result = extractFacebookItemData(
|
||||
"<html><body>Invalid HTML</body></html>",
|
||||
);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should handle invalid JSON in script tags", () => {
|
||||
const html =
|
||||
"<html><body><script>{invalid: json}</script></body></html>";
|
||||
const result = extractFacebookItemData(html);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should extract item with vehicle data", () => {
|
||||
const mockVehicleItem = {
|
||||
id: "789",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "2006 Honda Civic",
|
||||
formatted_price: { text: "$5,000" },
|
||||
listing_price: { amount: "5000.00", currency: "CAD" },
|
||||
vehicle_make_display_name: "Honda",
|
||||
vehicle_model_display_name: "Civic",
|
||||
vehicle_odometer_data: { unit: "KILOMETERS", value: 150000 },
|
||||
vehicle_transmission_type: "AUTOMATIC",
|
||||
is_live: true,
|
||||
};
|
||||
const mockData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
viewer: {
|
||||
marketplace_product_details_page: {
|
||||
target: mockVehicleItem,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
|
||||
|
||||
const result = extractFacebookItemData(html);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.vehicle_make_display_name).toBe("Honda");
|
||||
expect(result?.vehicle_odometer_data?.value).toBe(150000);
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractFacebookMarketplaceData", () => {
|
||||
test("should extract search results from marketplace data", () => {
|
||||
const mockMarketplaceData = {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Item 1",
|
||||
listing_price: { amount: "10.00", currency: "CAD" },
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "Item 2",
|
||||
listing_price: { amount: "20.00", currency: "CAD" },
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
const mockData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: mockMarketplaceData,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
|
||||
|
||||
const result = extractFacebookMarketplaceData(html);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result?.[0].node.listing.marketplace_listing_title).toBe(
|
||||
"Item 1",
|
||||
);
|
||||
});
|
||||
|
||||
test("should handle empty search results", () => {
|
||||
const mockData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: { edges: [] },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
|
||||
|
||||
const result = extractFacebookMarketplaceData(html);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Data Parsing", () => {
|
||||
describe("parseFacebookItem", () => {
|
||||
test("should parse complete item with all fields", () => {
|
||||
const item = {
|
||||
id: "123456",
|
||||
__typename: "GroupCommerceProductItem" as const,
|
||||
marketplace_listing_title: "iPhone 13 Pro",
|
||||
redacted_description: { text: "Excellent condition" },
|
||||
formatted_price: { text: "$800.00" },
|
||||
listing_price: { amount: "800.00", currency: "CAD" },
|
||||
location_text: { text: "Toronto, ON" },
|
||||
is_live: true,
|
||||
creation_time: 1640995200,
|
||||
marketplace_listing_seller: {
|
||||
id: "seller1",
|
||||
name: "John Doe",
|
||||
},
|
||||
delivery_types: ["IN_PERSON"],
|
||||
};
|
||||
|
||||
const result = parseFacebookItem(item);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.title).toBe("iPhone 13 Pro");
|
||||
expect(result?.description).toBe("Excellent condition");
|
||||
expect(result?.listingPrice?.amountFormatted).toBe("$800.00");
|
||||
expect(result?.listingPrice?.cents).toBe(80000);
|
||||
expect(result?.listingPrice?.currency).toBe("CAD");
|
||||
expect(result?.address).toBe("Toronto, ON");
|
||||
expect(result?.listingStatus).toBe("ACTIVE");
|
||||
expect(result?.seller?.name).toBe("John Doe");
|
||||
expect(result?.deliveryTypes).toEqual(["IN_PERSON"]);
|
||||
});
|
||||
|
||||
test("should parse FREE items", () => {
|
||||
const item = {
|
||||
id: "789",
|
||||
__typename: "GroupCommerceProductItem" as const,
|
||||
marketplace_listing_title: "Free Sofa",
|
||||
formatted_price: { text: "FREE" },
|
||||
listing_price: { amount: "0.00", currency: "CAD" },
|
||||
is_live: true,
|
||||
};
|
||||
|
||||
const result = parseFacebookItem(item);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.title).toBe("Free Sofa");
|
||||
expect(result?.listingPrice?.amountFormatted).toBe("FREE");
|
||||
expect(result?.listingPrice?.cents).toBe(0);
|
||||
});
|
||||
|
||||
test("should handle missing optional fields", () => {
|
||||
const item = {
|
||||
id: "456",
|
||||
__typename: "GroupCommerceProductItem" as const,
|
||||
marketplace_listing_title: "Minimal Item",
|
||||
};
|
||||
|
||||
const result = parseFacebookItem(item);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.title).toBe("Minimal Item");
|
||||
expect(result?.description).toBeUndefined();
|
||||
expect(result?.seller).toBeUndefined();
|
||||
});
|
||||
|
||||
test("should identify vehicle listings", () => {
|
||||
const vehicleItem = {
|
||||
id: "999",
|
||||
__typename: "GroupCommerceProductItem" as const,
|
||||
marketplace_listing_title: "2012 Mazda 3",
|
||||
formatted_price: { text: "$8,000" },
|
||||
listing_price: { amount: "8000.00", currency: "CAD" },
|
||||
vehicle_make_display_name: "Mazda",
|
||||
vehicle_model_display_name: "3",
|
||||
is_live: true,
|
||||
};
|
||||
|
||||
const result = parseFacebookItem(vehicleItem);
|
||||
expect(result?.listingType).toBe("vehicle");
|
||||
});
|
||||
|
||||
test("should handle different listing statuses", () => {
|
||||
const soldItem = {
|
||||
id: "111",
|
||||
__typename: "GroupCommerceProductItem" as const,
|
||||
marketplace_listing_title: "Sold Item",
|
||||
is_sold: true,
|
||||
is_live: false,
|
||||
};
|
||||
|
||||
const pendingItem = {
|
||||
id: "222",
|
||||
__typename: "GroupCommerceProductItem" as const,
|
||||
marketplace_listing_title: "Pending Item",
|
||||
is_pending: true,
|
||||
is_live: true,
|
||||
};
|
||||
|
||||
const hiddenItem = {
|
||||
id: "333",
|
||||
__typename: "GroupCommerceProductItem" as const,
|
||||
marketplace_listing_title: "Hidden Item",
|
||||
is_hidden: true,
|
||||
is_live: false,
|
||||
};
|
||||
|
||||
expect(parseFacebookItem(soldItem)?.listingStatus).toBe("SOLD");
|
||||
expect(parseFacebookItem(pendingItem)?.listingStatus).toBe("PENDING");
|
||||
expect(parseFacebookItem(hiddenItem)?.listingStatus).toBe("HIDDEN");
|
||||
});
|
||||
|
||||
test("should return null for items without title", () => {
|
||||
const invalidItem = {
|
||||
id: "invalid",
|
||||
__typename: "GroupCommerceProductItem" as const,
|
||||
is_live: true,
|
||||
};
|
||||
|
||||
const result = parseFacebookItem(invalidItem);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseFacebookAds", () => {
|
||||
test("should parse search result ads", () => {
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Ad 1",
|
||||
listing_price: {
|
||||
amount: "50.00",
|
||||
formatted_amount: "$50.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: { city_page: { display_name: "Toronto" } },
|
||||
},
|
||||
creation_time: 1640995200,
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "Ad 2",
|
||||
listing_price: {
|
||||
amount: "75.00",
|
||||
formatted_amount: "$75.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: { city_page: { display_name: "Ottawa" } },
|
||||
},
|
||||
creation_time: 1640995300,
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0].title).toBe("Ad 1");
|
||||
expect(results[0].listingPrice?.cents).toBe(5000);
|
||||
expect(results[0].address).toBe("Toronto");
|
||||
expect(results[1].title).toBe("Ad 2");
|
||||
expect(results[1].address).toBe("Ottawa");
|
||||
});
|
||||
|
||||
test("should filter out ads without price", () => {
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "With Price",
|
||||
listing_price: {
|
||||
amount: "100.00",
|
||||
formatted_amount: "$100.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "No Price",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("With Price");
|
||||
});
|
||||
|
||||
test("should handle malformed ads gracefully", () => {
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Valid Ad",
|
||||
listing_price: {
|
||||
amount: "50.00",
|
||||
formatted_amount: "$50.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
// Missing listing
|
||||
},
|
||||
} as { node: { listing?: unknown } },
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("Valid Ad");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Utility Functions", () => {
|
||||
describe("formatCentsToCurrency", () => {
|
||||
test("should format cents to currency string", () => {
|
||||
expect(formatCentsToCurrency(100)).toBe("$1.00");
|
||||
expect(formatCentsToCurrency(1000)).toBe("$10.00");
|
||||
expect(formatCentsToCurrency(9999)).toBe("$99.99");
|
||||
expect(formatCentsToCurrency(123456)).toBe("$1,234.56");
|
||||
});
|
||||
|
||||
test("should handle string inputs", () => {
|
||||
expect(formatCentsToCurrency("100")).toBe("$1.00");
|
||||
expect(formatCentsToCurrency("1000")).toBe("$10.00");
|
||||
});
|
||||
|
||||
test("should handle zero", () => {
|
||||
expect(formatCentsToCurrency(0)).toBe("$0.00");
|
||||
});
|
||||
|
||||
test("should handle null and undefined", () => {
|
||||
expect(formatCentsToCurrency(null)).toBe("");
|
||||
expect(formatCentsToCurrency(undefined)).toBe("");
|
||||
});
|
||||
|
||||
test("should handle invalid inputs", () => {
|
||||
expect(formatCentsToCurrency("invalid")).toBe("");
|
||||
expect(formatCentsToCurrency(Number.NaN)).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
describe("formatCookiesForHeader", () => {
|
||||
const mockCookies = [
|
||||
{ name: "c_user", value: "123456", domain: ".facebook.com", path: "/" },
|
||||
{ name: "xs", value: "abcdef", domain: ".facebook.com", path: "/" },
|
||||
{ name: "session_id", value: "xyz", domain: "other.com", path: "/" },
|
||||
];
|
||||
|
||||
test("should format cookies for header string", () => {
|
||||
const result = formatCookiesForHeader(mockCookies, "www.facebook.com");
|
||||
expect(result).toBe("c_user=123456; xs=abcdef");
|
||||
});
|
||||
|
||||
test("should filter expired cookies", () => {
|
||||
const cookiesWithExpiration = [
|
||||
...mockCookies,
|
||||
{
|
||||
name: "expired",
|
||||
value: "old",
|
||||
domain: ".facebook.com",
|
||||
path: "/",
|
||||
expirationDate: Date.now() / 1000 - 1000,
|
||||
},
|
||||
];
|
||||
const result = formatCookiesForHeader(
|
||||
cookiesWithExpiration,
|
||||
"www.facebook.com",
|
||||
);
|
||||
expect(result).not.toContain("expired");
|
||||
});
|
||||
|
||||
test("should handle no matching cookies", () => {
|
||||
const result = formatCookiesForHeader(mockCookies, "www.google.com");
|
||||
expect(result).toBe("");
|
||||
});
|
||||
|
||||
test("should handle empty cookie array", () => {
|
||||
const result = formatCookiesForHeader([], "www.facebook.com");
|
||||
expect(result).toBe("");
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
712
packages/core/test/facebook-integration.test.ts
Normal file
712
packages/core/test/facebook-integration.test.ts
Normal file
@@ -0,0 +1,712 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook";
|
||||
|
||||
// Mock fetch globally
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
beforeEach(() => {
|
||||
global.fetch = mock(() => {
|
||||
throw new Error("fetch should be mocked in individual tests");
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
});
|
||||
|
||||
describe("Main Search Function", () => {
|
||||
const mockCookies = JSON.stringify([
|
||||
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
|
||||
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
|
||||
]);
|
||||
|
||||
test("should successfully fetch search results", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "iPhone 13 Pro",
|
||||
listing_price: {
|
||||
amount: "800.00",
|
||||
formatted_amount: "$800.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Toronto" },
|
||||
},
|
||||
},
|
||||
creation_time: 1640995200,
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "Samsung Galaxy",
|
||||
listing_price: {
|
||||
amount: "600.00",
|
||||
formatted_amount: "$600.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Mississauga" },
|
||||
},
|
||||
},
|
||||
creation_time: 1640995300,
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"iPhone",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0].title).toBe("iPhone 13 Pro");
|
||||
expect(results[1].title).toBe("Samsung Galaxy");
|
||||
});
|
||||
|
||||
test("should filter out items without price", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "With Price",
|
||||
listing_price: {
|
||||
amount: "100.00",
|
||||
formatted_amount: "$100.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "No Price",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("With Price");
|
||||
});
|
||||
|
||||
test("should respect MAX_ITEMS parameter", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: Array.from({ length: 10 }, (_, i) => ({
|
||||
node: {
|
||||
listing: {
|
||||
id: String(i),
|
||||
marketplace_listing_title: `Item ${i}`,
|
||||
listing_price: {
|
||||
amount: `${(i + 1) * 10}.00`,
|
||||
formatted_amount: `$${(i + 1) * 10}.00`,
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
})),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
5,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(5);
|
||||
});
|
||||
|
||||
test("should return empty array for no results", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"nonexistent query",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("should handle authentication errors gracefully", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 401,
|
||||
text: () => Promise.resolve("Unauthorized"),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("should handle network errors", async () => {
|
||||
global.fetch = mock(() => Promise.reject(new Error("Network error")));
|
||||
|
||||
await expect(
|
||||
fetchFacebookItems("test", 1, "toronto", 25, mockCookies),
|
||||
).rejects.toThrow("Network error");
|
||||
});
|
||||
|
||||
test("should handle rate limiting with retry", async () => {
|
||||
let attempts = 0;
|
||||
global.fetch = mock(() => {
|
||||
attempts++;
|
||||
if (attempts === 1) {
|
||||
return Promise.resolve({
|
||||
ok: false,
|
||||
status: 429,
|
||||
headers: {
|
||||
get: (header: string) => {
|
||||
if (header === "X-RateLimit-Reset") return "1";
|
||||
return null;
|
||||
},
|
||||
},
|
||||
text: () => Promise.resolve("Rate limited"),
|
||||
});
|
||||
}
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Item 1",
|
||||
listing_price: {
|
||||
amount: "100.00",
|
||||
formatted_amount: "$100.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(attempts).toBe(2);
|
||||
expect(results).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Vehicle Listing Integration", () => {
|
||||
const mockCookies = JSON.stringify([
|
||||
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
|
||||
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
|
||||
]);
|
||||
|
||||
test("should correctly identify and parse vehicle listings", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "2006 Honda Civic",
|
||||
listing_price: {
|
||||
amount: "8000.00",
|
||||
formatted_amount: "$8,000.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "iPhone 13",
|
||||
listing_price: {
|
||||
amount: "800.00",
|
||||
formatted_amount: "$800.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"cars",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(2);
|
||||
// Both should be classified as "item" type in search results (vehicle detection is for item details)
|
||||
expect(results[0].title).toBe("2006 Honda Civic");
|
||||
expect(results[1].title).toBe("iPhone 13");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Different Categories", () => {
|
||||
const mockCookies = JSON.stringify([
|
||||
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
|
||||
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
|
||||
]);
|
||||
|
||||
test("should handle electronics listings", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Nintendo Switch",
|
||||
listing_price: {
|
||||
amount: "250.00",
|
||||
formatted_amount: "$250.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Toronto" },
|
||||
},
|
||||
},
|
||||
marketplace_listing_category_id:
|
||||
"479353692612078",
|
||||
condition: "USED",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"nintendo switch",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("Nintendo Switch");
|
||||
expect(results[0].categoryId).toBe("479353692612078");
|
||||
});
|
||||
|
||||
test("should handle home goods/furniture listings", async () => {
|
||||
const mockSearchData = {
|
||||
require: [
|
||||
[
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
{
|
||||
__bbox: {
|
||||
result: {
|
||||
data: {
|
||||
marketplace_search: {
|
||||
feed_units: {
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Dining Table",
|
||||
listing_price: {
|
||||
amount: "150.00",
|
||||
formatted_amount: "$150.00",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Mississauga" },
|
||||
},
|
||||
},
|
||||
marketplace_listing_category_id:
|
||||
"1569171756675761",
|
||||
condition: "USED",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
};
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"table",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("Dining Table");
|
||||
expect(results[0].categoryId).toBe("1569171756675761");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Error Scenarios", () => {
|
||||
const mockCookies = JSON.stringify([
|
||||
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
|
||||
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
|
||||
]);
|
||||
|
||||
test("should handle malformed HTML responses", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
"<html><body>Invalid HTML without JSON data</body></html>",
|
||||
),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("should handle 404 errors gracefully", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 404,
|
||||
text: () => Promise.resolve("Not found"),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("should handle 500 errors gracefully", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 500,
|
||||
text: () => Promise.resolve("Internal Server Error"),
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"test",
|
||||
1,
|
||||
"toronto",
|
||||
25,
|
||||
mockCookies,
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
});
|
||||
});
|
||||
166
packages/core/test/kijiji-core.test.ts
Normal file
166
packages/core/test/kijiji-core.test.ts
Normal file
@@ -0,0 +1,166 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import {
|
||||
HttpError,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
ValidationError,
|
||||
buildSearchUrl,
|
||||
resolveCategoryId,
|
||||
resolveLocationId,
|
||||
} from "../src/scrapers/kijiji";
|
||||
|
||||
describe("Location and Category Resolution", () => {
|
||||
describe("resolveLocationId", () => {
|
||||
test("should return numeric IDs as-is", () => {
|
||||
expect(resolveLocationId(1700272)).toBe(1700272);
|
||||
expect(resolveLocationId(0)).toBe(0);
|
||||
});
|
||||
|
||||
test("should resolve string location names", () => {
|
||||
expect(resolveLocationId("canada")).toBe(0);
|
||||
expect(resolveLocationId("ontario")).toBe(9004);
|
||||
expect(resolveLocationId("toronto")).toBe(1700273);
|
||||
expect(resolveLocationId("gta")).toBe(1700272);
|
||||
});
|
||||
|
||||
test("should handle case insensitive matching", () => {
|
||||
expect(resolveLocationId("Canada")).toBe(0);
|
||||
expect(resolveLocationId("ONTARIO")).toBe(9004);
|
||||
});
|
||||
|
||||
test("should default to Canada for unknown locations", () => {
|
||||
expect(resolveLocationId("unknown")).toBe(0);
|
||||
expect(resolveLocationId("")).toBe(0);
|
||||
});
|
||||
|
||||
test("should handle undefined input", () => {
|
||||
expect(resolveLocationId(undefined)).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveCategoryId", () => {
|
||||
test("should return numeric IDs as-is", () => {
|
||||
expect(resolveCategoryId(132)).toBe(132);
|
||||
expect(resolveCategoryId(0)).toBe(0);
|
||||
});
|
||||
|
||||
test("should resolve string category names", () => {
|
||||
expect(resolveCategoryId("all")).toBe(0);
|
||||
expect(resolveCategoryId("phones")).toBe(132);
|
||||
expect(resolveCategoryId("electronics")).toBe(29659001);
|
||||
expect(resolveCategoryId("buy-sell")).toBe(10);
|
||||
});
|
||||
|
||||
test("should handle case insensitive matching", () => {
|
||||
expect(resolveCategoryId("All")).toBe(0);
|
||||
expect(resolveCategoryId("PHONES")).toBe(132);
|
||||
});
|
||||
|
||||
test("should default to all categories for unknown categories", () => {
|
||||
expect(resolveCategoryId("unknown")).toBe(0);
|
||||
expect(resolveCategoryId("")).toBe(0);
|
||||
});
|
||||
|
||||
test("should handle undefined input", () => {
|
||||
expect(resolveCategoryId(undefined)).toBe(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("URL Construction", () => {
|
||||
describe("buildSearchUrl", () => {
|
||||
test("should build basic search URL", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
location: 1700272,
|
||||
category: 132,
|
||||
sortBy: "relevancy",
|
||||
sortOrder: "desc",
|
||||
});
|
||||
|
||||
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
|
||||
expect(url).toContain("sort=relevancyDesc");
|
||||
expect(url).toContain("order=DESC");
|
||||
});
|
||||
|
||||
test("should handle pagination", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
location: 1700272,
|
||||
category: 132,
|
||||
page: 2,
|
||||
});
|
||||
|
||||
expect(url).toContain("&page=2");
|
||||
});
|
||||
|
||||
test("should handle different sort options", () => {
|
||||
const dateUrl = buildSearchUrl("iphone", {
|
||||
sortBy: "date",
|
||||
sortOrder: "asc",
|
||||
});
|
||||
expect(dateUrl).toContain("sort=DATE");
|
||||
expect(dateUrl).toContain("order=ASC");
|
||||
|
||||
const priceUrl = buildSearchUrl("iphone", {
|
||||
sortBy: "price",
|
||||
sortOrder: "desc",
|
||||
});
|
||||
expect(priceUrl).toContain("sort=PRICE");
|
||||
expect(priceUrl).toContain("order=DESC");
|
||||
});
|
||||
|
||||
test("should handle string location/category inputs", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
location: "toronto",
|
||||
category: "phones",
|
||||
});
|
||||
|
||||
expect(url).toContain("k0c132l1700273"); // phones + toronto
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Error Classes", () => {
|
||||
test("HttpError should store status and URL", () => {
|
||||
const error = new HttpError("Not found", 404, "https://example.com");
|
||||
expect(error.message).toBe("Not found");
|
||||
expect(error.statusCode).toBe(404);
|
||||
expect(error.url).toBe("https://example.com");
|
||||
expect(error.name).toBe("HttpError");
|
||||
});
|
||||
|
||||
test("NetworkError should store URL and cause", () => {
|
||||
const cause = new Error("Connection failed");
|
||||
const error = new NetworkError(
|
||||
"Network error",
|
||||
"https://example.com",
|
||||
cause
|
||||
);
|
||||
expect(error.message).toBe("Network error");
|
||||
expect(error.url).toBe("https://example.com");
|
||||
expect(error.cause).toBe(cause);
|
||||
expect(error.name).toBe("NetworkError");
|
||||
});
|
||||
|
||||
test("ParseError should store data", () => {
|
||||
const data = { invalid: "json" };
|
||||
const error = new ParseError("Invalid JSON", data);
|
||||
expect(error.message).toBe("Invalid JSON");
|
||||
expect(error.data).toBe(data);
|
||||
expect(error.name).toBe("ParseError");
|
||||
});
|
||||
|
||||
test("RateLimitError should store URL and reset time", () => {
|
||||
const error = new RateLimitError("Rate limited", "https://example.com", 60);
|
||||
expect(error.message).toBe("Rate limited");
|
||||
expect(error.url).toBe("https://example.com");
|
||||
expect(error.resetTime).toBe(60);
|
||||
expect(error.name).toBe("RateLimitError");
|
||||
});
|
||||
|
||||
test("ValidationError should work without field", () => {
|
||||
const error = new ValidationError("Invalid value");
|
||||
expect(error.message).toBe("Invalid value");
|
||||
expect(error.name).toBe("ValidationError");
|
||||
});
|
||||
});
|
||||
363
packages/core/test/kijiji-integration.test.ts
Normal file
363
packages/core/test/kijiji-integration.test.ts
Normal file
@@ -0,0 +1,363 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import {
|
||||
extractApolloState,
|
||||
parseDetailedListing,
|
||||
parseSearch,
|
||||
} from "../src/scrapers/kijiji";
|
||||
|
||||
// Mock fetch globally
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
describe("HTML Parsing Integration", () => {
|
||||
beforeEach(() => {
|
||||
// Mock fetch for all tests
|
||||
global.fetch = mock(() => {
|
||||
throw new Error("fetch should be mocked in individual tests");
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
});
|
||||
|
||||
describe("extractApolloState", () => {
|
||||
test("should extract Apollo state from valid HTML", () => {
|
||||
const mockHtml =
|
||||
'<html><head><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"__APOLLO_STATE__":{"ROOT_QUERY":{"test":"value"}}}}}</script></head></html>';
|
||||
|
||||
const result = extractApolloState(mockHtml);
|
||||
expect(result).toEqual({
|
||||
ROOT_QUERY: { test: "value" },
|
||||
});
|
||||
});
|
||||
|
||||
test("should return null for HTML without Apollo state", () => {
|
||||
const mockHtml = "<html><body>No data here</body></html>";
|
||||
const result = extractApolloState(mockHtml);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should return null for malformed JSON", () => {
|
||||
const mockHtml =
|
||||
'<html><script id="__NEXT_DATA__" type="application/json">{"invalid": json}</script></html>';
|
||||
|
||||
const result = extractApolloState(mockHtml);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should handle missing __NEXT_DATA__ element", () => {
|
||||
const mockHtml = "<html><body><div>Content</div></body></html>";
|
||||
const result = extractApolloState(mockHtml);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseSearch", () => {
|
||||
test("should parse search results from HTML", () => {
|
||||
const mockHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:123": {
|
||||
url: "/v-iphone/k0l0",
|
||||
title: "iPhone 13 Pro",
|
||||
},
|
||||
"Listing:456": {
|
||||
url: "/v-samsung/k0l0",
|
||||
title: "Samsung Galaxy",
|
||||
},
|
||||
ROOT_QUERY: { test: "value" },
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0]).toEqual({
|
||||
name: "iPhone 13 Pro",
|
||||
listingLink: "https://www.kijiji.ca/v-iphone/k0l0",
|
||||
});
|
||||
expect(results[1]).toEqual({
|
||||
name: "Samsung Galaxy",
|
||||
listingLink: "https://www.kijiji.ca/v-samsung/k0l0",
|
||||
});
|
||||
});
|
||||
|
||||
test("should handle absolute URLs", () => {
|
||||
const mockHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:123": {
|
||||
url: "https://www.kijiji.ca/v-iphone/k0l0",
|
||||
title: "iPhone 13 Pro",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
|
||||
expect(results[0].listingLink).toBe(
|
||||
"https://www.kijiji.ca/v-iphone/k0l0",
|
||||
);
|
||||
});
|
||||
|
||||
test("should filter out invalid listings", () => {
|
||||
const mockHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:123": {
|
||||
url: "/v-iphone/k0l0",
|
||||
title: "iPhone 13 Pro",
|
||||
},
|
||||
"Listing:456": {
|
||||
url: "/v-samsung/k0l0",
|
||||
// Missing title
|
||||
},
|
||||
"Other:789": {
|
||||
url: "/v-other/k0l0",
|
||||
title: "Other Item",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].name).toBe("iPhone 13 Pro");
|
||||
});
|
||||
|
||||
test("should return empty array for invalid HTML", () => {
|
||||
const results = parseSearch(
|
||||
"<html><body>Invalid</body></html>",
|
||||
"https://www.kijiji.ca",
|
||||
);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseDetailedListing", () => {
|
||||
test("should parse detailed listing with all fields", async () => {
|
||||
const mockHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:123": {
|
||||
url: "/v-iphone-13-pro/k0l0",
|
||||
title: "iPhone 13 Pro 256GB",
|
||||
description: "Excellent condition iPhone 13 Pro",
|
||||
price: {
|
||||
amount: 80000,
|
||||
currency: "CAD",
|
||||
type: "FIXED",
|
||||
},
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
activationDate: "2024-01-15T10:00:00.000Z",
|
||||
endDate: "2025-01-15T10:00:00.000Z",
|
||||
metrics: { views: 150 },
|
||||
location: {
|
||||
address: "Toronto, ON",
|
||||
id: 1700273,
|
||||
name: "Toronto",
|
||||
coordinates: {
|
||||
latitude: 43.6532,
|
||||
longitude: -79.3832,
|
||||
},
|
||||
},
|
||||
imageUrls: [
|
||||
"https://media.kijiji.ca/api/v1/image1.jpg",
|
||||
"https://media.kijiji.ca/api/v1/image2.jpg",
|
||||
],
|
||||
imageCount: 2,
|
||||
categoryId: 132,
|
||||
adSource: "ORGANIC",
|
||||
flags: {
|
||||
topAd: false,
|
||||
priceDrop: true,
|
||||
},
|
||||
posterInfo: {
|
||||
posterId: "user123",
|
||||
rating: 4.8,
|
||||
},
|
||||
attributes: [
|
||||
{
|
||||
canonicalName: "forsaleby",
|
||||
canonicalValues: ["ownr"],
|
||||
},
|
||||
{
|
||||
canonicalName: "phonecarrier",
|
||||
canonicalValues: ["unlocked"],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const result = await parseDetailedListing(
|
||||
mockHtml,
|
||||
"https://www.kijiji.ca",
|
||||
);
|
||||
expect(result).toEqual({
|
||||
url: "https://www.kijiji.ca/v-iphone-13-pro/k0l0",
|
||||
title: "iPhone 13 Pro 256GB",
|
||||
description: "Excellent condition iPhone 13 Pro",
|
||||
listingPrice: {
|
||||
amountFormatted: "$800.00",
|
||||
cents: 80000,
|
||||
currency: "CAD",
|
||||
},
|
||||
listingType: "OFFER",
|
||||
listingStatus: "ACTIVE",
|
||||
creationDate: "2024-01-15T10:00:00.000Z",
|
||||
endDate: "2025-01-15T10:00:00.000Z",
|
||||
numberOfViews: 150,
|
||||
address: "Toronto, ON",
|
||||
images: [
|
||||
"https://media.kijiji.ca/api/v1/image1.jpg",
|
||||
"https://media.kijiji.ca/api/v1/image2.jpg",
|
||||
],
|
||||
categoryId: 132,
|
||||
adSource: "ORGANIC",
|
||||
flags: {
|
||||
topAd: false,
|
||||
priceDrop: true,
|
||||
},
|
||||
attributes: {
|
||||
forsaleby: ["ownr"],
|
||||
phonecarrier: ["unlocked"],
|
||||
},
|
||||
location: {
|
||||
id: 1700273,
|
||||
name: "Toronto",
|
||||
coordinates: {
|
||||
latitude: 43.6532,
|
||||
longitude: -79.3832,
|
||||
},
|
||||
},
|
||||
sellerInfo: {
|
||||
posterId: "user123",
|
||||
rating: 4.8,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test("should return null for contact-based pricing", async () => {
|
||||
const mockHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:123": {
|
||||
url: "/v-iphone/k0l0",
|
||||
title: "iPhone for Sale",
|
||||
price: {
|
||||
type: "CONTACT",
|
||||
amount: null,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const result = await parseDetailedListing(
|
||||
mockHtml,
|
||||
"https://www.kijiji.ca",
|
||||
);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should handle missing optional fields", async () => {
|
||||
const mockHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:123": {
|
||||
url: "/v-iphone/k0l0",
|
||||
title: "iPhone 13",
|
||||
price: { amount: 50000 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const result = await parseDetailedListing(
|
||||
mockHtml,
|
||||
"https://www.kijiji.ca",
|
||||
);
|
||||
expect(result).toEqual({
|
||||
url: "https://www.kijiji.ca/v-iphone/k0l0",
|
||||
title: "iPhone 13",
|
||||
description: undefined,
|
||||
listingPrice: {
|
||||
amountFormatted: "$500.00",
|
||||
cents: 50000,
|
||||
currency: undefined,
|
||||
},
|
||||
listingType: undefined,
|
||||
listingStatus: undefined,
|
||||
creationDate: undefined,
|
||||
endDate: undefined,
|
||||
numberOfViews: undefined,
|
||||
address: null,
|
||||
images: [],
|
||||
categoryId: 0,
|
||||
adSource: "UNKNOWN",
|
||||
flags: {
|
||||
topAd: false,
|
||||
priceDrop: false,
|
||||
},
|
||||
attributes: {},
|
||||
location: {
|
||||
id: 0,
|
||||
name: "Unknown",
|
||||
coordinates: undefined,
|
||||
},
|
||||
sellerInfo: undefined,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
54
packages/core/test/kijiji-utils.test.ts
Normal file
54
packages/core/test/kijiji-utils.test.ts
Normal file
@@ -0,0 +1,54 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
||||
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
|
||||
|
||||
describe("Utility Functions", () => {
|
||||
describe("slugify", () => {
|
||||
test("should convert basic strings to slugs", () => {
|
||||
expect(slugify("Hello World")).toBe("hello-world");
|
||||
expect(slugify("iPhone 13 Pro")).toBe("iphone-13-pro");
|
||||
});
|
||||
|
||||
test("should handle special characters", () => {
|
||||
expect(slugify("Café & Restaurant")).toBe("cafe-restaurant");
|
||||
expect(slugify("100% New")).toBe("100-new");
|
||||
});
|
||||
|
||||
test("should handle empty and edge cases", () => {
|
||||
expect(slugify("")).toBe("");
|
||||
expect(slugify(" ")).toBe("-");
|
||||
expect(slugify("---")).toBe("-");
|
||||
});
|
||||
|
||||
test("should preserve numbers and valid characters", () => {
|
||||
expect(slugify("iPhone 13")).toBe("iphone-13");
|
||||
expect(slugify("item123")).toBe("item123");
|
||||
});
|
||||
});
|
||||
|
||||
describe("formatCentsToCurrency", () => {
|
||||
test("should format valid cent values", () => {
|
||||
expect(formatCentsToCurrency(100)).toBe("$1.00");
|
||||
expect(formatCentsToCurrency(1999)).toBe("$19.99");
|
||||
expect(formatCentsToCurrency(0)).toBe("$0.00");
|
||||
});
|
||||
|
||||
test("should handle string inputs", () => {
|
||||
expect(formatCentsToCurrency("100")).toBe("$1.00");
|
||||
expect(formatCentsToCurrency("1999")).toBe("$19.99");
|
||||
});
|
||||
|
||||
test("should handle null/undefined inputs", () => {
|
||||
expect(formatCentsToCurrency(null)).toBe("");
|
||||
expect(formatCentsToCurrency(undefined)).toBe("");
|
||||
});
|
||||
|
||||
test("should handle invalid inputs", () => {
|
||||
expect(formatCentsToCurrency("invalid")).toBe("");
|
||||
expect(formatCentsToCurrency(Number.NaN)).toBe("");
|
||||
});
|
||||
|
||||
test("should use en-US locale formatting", () => {
|
||||
expect(formatCentsToCurrency(123456)).toBe("$1,234.56");
|
||||
});
|
||||
});
|
||||
});
|
||||
11
packages/core/test/setup.ts
Normal file
11
packages/core/test/setup.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
// Test setup for Bun test runner
|
||||
// This file is loaded before any tests run due to bunfig.toml preload
|
||||
|
||||
// Mock fetch globally for tests
|
||||
global.fetch =
|
||||
global.fetch ||
|
||||
(() => {
|
||||
throw new Error("fetch is not available in test environment");
|
||||
});
|
||||
|
||||
// Add any global test utilities here
|
||||
Reference in New Issue
Block a user