diff --git a/biome.json b/biome.json index 7d222a6..1a0bf51 100644 --- a/biome.json +++ b/biome.json @@ -1,34 +1,34 @@ { - "$schema": "https://biomejs.dev/schemas/2.3.11/schema.json", - "vcs": { - "enabled": true, - "clientKind": "git", - "useIgnoreFile": true - }, - "files": { - "includes": ["**", "!!**/dist"] - }, - "formatter": { - "enabled": true, - "indentStyle": "space" - }, - "linter": { - "enabled": true, - "rules": { - "recommended": true - } - }, - "javascript": { - "formatter": { - "quoteStyle": "double" - } - }, - "assist": { - "enabled": true, - "actions": { - "source": { - "organizeImports": "on" - } - } - } + "$schema": "https://biomejs.dev/schemas/2.3.11/schema.json", + "vcs": { + "enabled": true, + "clientKind": "git", + "useIgnoreFile": true + }, + "files": { + "includes": ["**", "!!**/dist"] + }, + "formatter": { + "enabled": true, + "indentStyle": "space" + }, + "linter": { + "enabled": true, + "rules": { + "recommended": true + } + }, + "javascript": { + "formatter": { + "quoteStyle": "double" + } + }, + "assist": { + "enabled": true, + "actions": { + "source": { + "organizeImports": "on" + } + } + } } diff --git a/package.json b/package.json index b6c52b9..9eeb531 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,9 @@ }, "private": true, "type": "module", - "workspaces": ["packages/*"], + "workspaces": [ + "packages/*" + ], "devDependencies": { "@biomejs/biome": "2.3.11" } diff --git a/packages/api-server/src/routes/status.ts b/packages/api-server/src/routes/status.ts index 7fb4f7d..d32d946 100644 --- a/packages/api-server/src/routes/status.ts +++ b/packages/api-server/src/routes/status.ts @@ -2,5 +2,5 @@ * Health check endpoint */ export function statusRoute(): Response { - return new Response("OK", { status: 200 }); + return new Response("OK", { status: 200 }); } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 2fb9c6f..c743dd4 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -1,45 +1,42 @@ // Export all scrapers -export { - default as fetchKijijiItems, - slugify, - resolveLocationId, - resolveCategoryId, - buildSearchUrl, - extractApolloState, - parseSearch, - parseDetailedListing, - HttpError, - NetworkError, - ParseError, - RateLimitError, - ValidationError, -} from "./scrapers/kijiji"; -export type { - KijijiListingDetails, - DetailedListing, - SearchOptions, - ListingFetchOptions, -} from "./scrapers/kijiji"; -export { - default as fetchFacebookItems, - fetchFacebookItem, - parseFacebookCookieString, - ensureFacebookCookies, - extractFacebookMarketplaceData, - extractFacebookItemData, - parseFacebookAds, - parseFacebookItem, -} from "./scrapers/facebook"; -export type { FacebookListingDetails } from "./scrapers/facebook"; - -export { default as fetchEbayItems } from "./scrapers/ebay"; export type { EbayListingDetails } from "./scrapers/ebay"; - -// Export shared utilities -export * from "./utils/http"; -export * from "./utils/delay"; -export * from "./utils/format"; - +export { default as fetchEbayItems } from "./scrapers/ebay"; +export type { FacebookListingDetails } from "./scrapers/facebook"; +export { + default as fetchFacebookItems, + ensureFacebookCookies, + extractFacebookItemData, + extractFacebookMarketplaceData, + fetchFacebookItem, + parseFacebookAds, + parseFacebookCookieString, + parseFacebookItem, +} from "./scrapers/facebook"; +export type { + DetailedListing, + KijijiListingDetails, + ListingFetchOptions, + SearchOptions, +} from "./scrapers/kijiji"; +export { + buildSearchUrl, + default as fetchKijijiItems, + extractApolloState, + HttpError, + NetworkError, + ParseError, + parseDetailedListing, + parseSearch, + RateLimitError, + resolveCategoryId, + resolveLocationId, + slugify, + ValidationError, +} from "./scrapers/kijiji"; // Export shared types export * from "./types/common"; +export * from "./utils/delay"; +export * from "./utils/format"; +// Export shared utilities +export * from "./utils/http"; diff --git a/packages/core/src/scrapers/ebay.ts b/packages/core/src/scrapers/ebay.ts index 1d1355e..6b3717b 100644 --- a/packages/core/src/scrapers/ebay.ts +++ b/packages/core/src/scrapers/ebay.ts @@ -1,9 +1,4 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ -import { parseHTML } from "linkedom"; -import type { HTMLString } from "../types/common"; -import { delay } from "../utils/delay"; -import { formatCentsToCurrency } from "../utils/format"; -import { isRecord } from "../utils/http"; // ----------------------------- Types ----------------------------- @@ -43,7 +38,7 @@ function parseEbayPrice( const amountStr = numberMatches[0].replace(/,/g, ""); const dollars = parseFloat(amountStr); - if (isNaN(dollars)) return null; + if (Number.isNaN(dollars)) return null; const cents = Math.round(dollars * 100); @@ -185,8 +180,7 @@ function parseEbayListings( const text = el.textContent?.trim(); // Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words if ( - text && - text.includes("$") && + text?.includes("$") && text.length < 100 && !text.includes("laptop") && !text.includes("computer") && diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index 73439ec..2614200 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -1,10 +1,11 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ -import { parseHTML } from "linkedom"; + import cliProgress from "cli-progress"; -import { isRecord } from "../utils/http"; +import { parseHTML } from "linkedom"; +import type { HTMLString } from "../types/common"; import { delay } from "../utils/delay"; import { formatCentsToCurrency } from "../utils/format"; -import type { HTMLString } from "../types/common"; +import { isRecord } from "../utils/http"; /** * Facebook Marketplace Scraper @@ -17,189 +18,189 @@ import type { HTMLString } from "../types/common"; // ----------------------------- Types ----------------------------- interface Cookie { - name: string; - value: string; - domain: string; - path: string; - secure?: boolean; - httpOnly?: boolean; - sameSite?: "strict" | "lax" | "none" | "unspecified"; - session?: boolean; - expirationDate?: number; - partitionKey?: Record; - storeId?: string; + name: string; + value: string; + domain: string; + path: string; + secure?: boolean; + httpOnly?: boolean; + sameSite?: "strict" | "lax" | "none" | "unspecified"; + session?: boolean; + expirationDate?: number; + partitionKey?: Record; + storeId?: string; } interface FacebookAdNode { - node: { - listing: { - id: string; - marketplace_listing_title?: string; - listing_price?: { - amount?: string | number; - currency?: string; - amount_with_offset_in_currency?: string | number; - formatted_amount?: string; - }; - location?: { - reverse_geocode?: { - city_page?: { - display_name?: string; - }; - }; - }; - creation_time?: number; - is_sold?: boolean; - is_pending?: boolean; - is_live?: boolean; - is_hidden?: boolean; - primary_listing_photo?: { - image?: { - uri?: string; - }; - }; - listing_video?: { - id?: string; - }; - marketplace_listing_seller?: { - name?: string; - id?: string; - }; - marketplace_listing_category_id?: string; - delivery_types?: string[]; - [k: string]: unknown; - }; - [k: string]: unknown; - }; + node: { + listing: { + id: string; + marketplace_listing_title?: string; + listing_price?: { + amount?: string | number; + currency?: string; + amount_with_offset_in_currency?: string | number; + formatted_amount?: string; + }; + location?: { + reverse_geocode?: { + city_page?: { + display_name?: string; + }; + }; + }; + creation_time?: number; + is_sold?: boolean; + is_pending?: boolean; + is_live?: boolean; + is_hidden?: boolean; + primary_listing_photo?: { + image?: { + uri?: string; + }; + }; + listing_video?: { + id?: string; + }; + marketplace_listing_seller?: { + name?: string; + id?: string; + }; + marketplace_listing_category_id?: string; + delivery_types?: string[]; + [k: string]: unknown; + }; + [k: string]: unknown; + }; } interface FacebookEdge { - node: FacebookAdNode["node"]; - [k: string]: unknown; + node: FacebookAdNode["node"]; + [k: string]: unknown; } interface FacebookMarketplaceSearch { - feed_units?: { - edges?: FacebookEdge[]; - }; - [k: string]: unknown; + feed_units?: { + edges?: FacebookEdge[]; + }; + [k: string]: unknown; } interface FacebookMarketplaceItem { - // Basic identification - id: string; - __typename: "GroupCommerceProductItem"; + // Basic identification + id: string; + __typename: "GroupCommerceProductItem"; - // Listing content - marketplace_listing_title: string; - redacted_description?: { - text: string; - }; - custom_title?: string; + // Listing content + marketplace_listing_title: string; + redacted_description?: { + text: string; + }; + custom_title?: string; - // Pricing - formatted_price?: { - text: string; - }; - listing_price?: { - amount: string; - currency: string; - amount_with_offset: string; - }; + // Pricing + formatted_price?: { + text: string; + }; + listing_price?: { + amount: string; + currency: string; + amount_with_offset: string; + }; - // Location - location_text?: { - text: string; - }; - location?: { - latitude: number; - longitude: number; - reverse_geocode_detailed?: { - country_alpha_two: string; - postal_code_trimmed: string; - }; - }; + // Location + location_text?: { + text: string; + }; + location?: { + latitude: number; + longitude: number; + reverse_geocode_detailed?: { + country_alpha_two: string; + postal_code_trimmed: string; + }; + }; - // Status flags - is_live?: boolean; - is_sold?: boolean; - is_pending?: boolean; - is_hidden?: boolean; - is_draft?: boolean; + // Status flags + is_live?: boolean; + is_sold?: boolean; + is_pending?: boolean; + is_hidden?: boolean; + is_draft?: boolean; - // Timing - creation_time?: number; + // Timing + creation_time?: number; - // Seller information - marketplace_listing_seller?: { - __typename: "User"; - id: string; - name: string; - profile_picture?: { - uri: string; - }; - join_time?: number; - }; + // Seller information + marketplace_listing_seller?: { + __typename: "User"; + id: string; + name: string; + profile_picture?: { + uri: string; + }; + join_time?: number; + }; - // Vehicle-specific fields (for automotive listings) - vehicle_make_display_name?: string; - vehicle_model_display_name?: string; - vehicle_odometer_data?: { - unit: "KILOMETERS" | "MILES"; - value: number; - }; - vehicle_transmission_type?: "AUTOMATIC" | "MANUAL"; - vehicle_exterior_color?: string; - vehicle_interior_color?: string; - vehicle_condition?: "EXCELLENT" | "GOOD" | "FAIR" | "POOR"; - vehicle_fuel_type?: string; - vehicle_trim_display_name?: string; + // Vehicle-specific fields (for automotive listings) + vehicle_make_display_name?: string; + vehicle_model_display_name?: string; + vehicle_odometer_data?: { + unit: "KILOMETERS" | "MILES"; + value: number; + }; + vehicle_transmission_type?: "AUTOMATIC" | "MANUAL"; + vehicle_exterior_color?: string; + vehicle_interior_color?: string; + vehicle_condition?: "EXCELLENT" | "GOOD" | "FAIR" | "POOR"; + vehicle_fuel_type?: string; + vehicle_trim_display_name?: string; - // Category and commerce - marketplace_listing_category_id?: string; - condition?: string; + // Category and commerce + marketplace_listing_category_id?: string; + condition?: string; - // Commerce features - delivery_types?: string[]; - is_shipping_offered?: boolean; - is_buy_now_enabled?: boolean; - can_buyer_make_checkout_offer?: boolean; + // Commerce features + delivery_types?: string[]; + is_shipping_offered?: boolean; + is_buy_now_enabled?: boolean; + can_buyer_make_checkout_offer?: boolean; - // Communication - messaging_enabled?: boolean; - first_message_suggested_value?: string; + // Communication + messaging_enabled?: boolean; + first_message_suggested_value?: string; - // Metadata - logging_id?: string; - reportable_ent_id?: string; + // Metadata + logging_id?: string; + reportable_ent_id?: string; - [k: string]: unknown; + [k: string]: unknown; } export interface FacebookListingDetails { - url: string; - title: string; - description?: string; - listingPrice?: { - amountFormatted: string; - cents?: number; - currency?: string; - }; - listingType?: string; - listingStatus?: string; - creationDate?: string; - endDate?: string; - numberOfViews?: number; - address?: string | null; - // Facebook-specific fields - imageUrl?: string; - videoUrl?: string; - seller?: { - name?: string; - id?: string; - }; - categoryId?: string; - deliveryTypes?: string[]; + url: string; + title: string; + description?: string; + listingPrice?: { + amountFormatted: string; + cents?: number; + currency?: string; + }; + listingType?: string; + listingStatus?: string; + creationDate?: string; + endDate?: string; + numberOfViews?: number; + address?: string | null; + // Facebook-specific fields + imageUrl?: string; + videoUrl?: string; + seller?: { + name?: string; + id?: string; + }; + categoryId?: string; + deliveryTypes?: string[]; } // ----------------------------- Utilities ----------------------------- @@ -208,171 +209,171 @@ export interface FacebookListingDetails { * Load Facebook cookies from file or string */ async function loadFacebookCookies( - cookiesSource?: string, - cookiePath = "./cookies/facebook.json" + cookiesSource?: string, + cookiePath = "./cookies/facebook.json", ): Promise { - // First try to load from provided string parameter - if (cookiesSource) { - try { - const cookies = JSON.parse(cookiesSource); - if (Array.isArray(cookies)) { - return cookies.filter( - (cookie): cookie is Cookie => - cookie && - typeof cookie.name === "string" && - typeof cookie.value === "string" - ); - } - } catch (e) { - throw new Error(`Invalid cookies JSON provided: ${e}`); - } - } + // First try to load from provided string parameter + if (cookiesSource) { + try { + const cookies = JSON.parse(cookiesSource); + if (Array.isArray(cookies)) { + return cookies.filter( + (cookie): cookie is Cookie => + cookie && + typeof cookie.name === "string" && + typeof cookie.value === "string", + ); + } + } catch (e) { + throw new Error(`Invalid cookies JSON provided: ${e}`); + } + } - // Try to load from specified path - try { - const cookiesPath = cookiePath; - const file = Bun.file(cookiesPath); - if (await file.exists()) { - const content = await file.text(); - const cookies = JSON.parse(content); - if (Array.isArray(cookies)) { - return cookies.filter( - (cookie): cookie is Cookie => - cookie && - typeof cookie.name === "string" && - typeof cookie.value === "string" - ); - } - } - } catch (e) { - console.warn(`Could not load cookies from ${cookiePath}: ${e}`); - } + // Try to load from specified path + try { + const cookiesPath = cookiePath; + const file = Bun.file(cookiesPath); + if (await file.exists()) { + const content = await file.text(); + const cookies = JSON.parse(content); + if (Array.isArray(cookies)) { + return cookies.filter( + (cookie): cookie is Cookie => + cookie && + typeof cookie.name === "string" && + typeof cookie.value === "string", + ); + } + } + } catch (e) { + console.warn(`Could not load cookies from ${cookiePath}: ${e}`); + } - return []; + return []; } /** * Parse Facebook cookie string into Cookie array format */ export function parseFacebookCookieString(cookieString: string): Cookie[] { - if (!cookieString || !cookieString.trim()) { - return []; - } + if (!cookieString || !cookieString.trim()) { + return []; + } - return cookieString - .split(";") - .map((pair) => pair.trim()) - .filter((pair) => pair.includes("=")) - .map((pair) => { - const [name, value] = pair.split("=", 2); - const trimmedName = name.trim(); - const trimmedValue = value.trim(); + return cookieString + .split(";") + .map((pair) => pair.trim()) + .filter((pair) => pair.includes("=")) + .map((pair) => { + const [name, value] = pair.split("=", 2); + const trimmedName = name.trim(); + const trimmedValue = value.trim(); - // Skip empty names or values - if (!trimmedName || !trimmedValue) { - return null; - } + // Skip empty names or values + if (!trimmedName || !trimmedValue) { + return null; + } - return { - name: trimmedName, - value: decodeURIComponent(trimmedValue), - domain: ".facebook.com", - path: "/", - secure: true, - httpOnly: false, - sameSite: "lax" as const, - expirationDate: undefined, // Session cookies - }; - }) - .filter((cookie): cookie is Cookie => cookie !== null); + return { + name: trimmedName, + value: decodeURIComponent(trimmedValue), + domain: ".facebook.com", + path: "/", + secure: true, + httpOnly: false, + sameSite: "lax" as const, + expirationDate: undefined, // Session cookies + }; + }) + .filter((cookie): cookie is Cookie => cookie !== null); } /** * Ensure Facebook cookies are available, parsing from env var if needed */ export async function ensureFacebookCookies( - cookiePath = "./cookies/facebook.json" + cookiePath = "./cookies/facebook.json", ): Promise { - // First try to load existing cookies - try { - const existing = await loadFacebookCookies(undefined, cookiePath); - if (existing.length > 0) { - return existing; - } - } catch { - // File doesn't exist or is invalid, continue to check env var - } + // First try to load existing cookies + try { + const existing = await loadFacebookCookies(undefined, cookiePath); + if (existing.length > 0) { + return existing; + } + } catch { + // File doesn't exist or is invalid, continue to check env var + } - // Try to parse from environment variable - const cookieString = process.env.FACEBOOK_COOKIE; - if (!cookieString || !cookieString.trim()) { - throw new Error( - "No valid Facebook cookies found. Either:\n" + - " 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" + - " 2. Create ./cookies/facebook.json manually with cookie array" - ); - } + // Try to parse from environment variable + const cookieString = process.env.FACEBOOK_COOKIE; + if (!cookieString || !cookieString.trim()) { + throw new Error( + "No valid Facebook cookies found. Either:\n" + + " 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" + + " 2. Create ./cookies/facebook.json manually with cookie array", + ); + } - // Parse the cookie string - const cookies = parseFacebookCookieString(cookieString); - if (cookies.length === 0) { - throw new Error( - "FACEBOOK_COOKIE environment variable contains no valid cookies. " + - 'Expected format: "name1=value1; name2=value2;"' - ); - } + // Parse the cookie string + const cookies = parseFacebookCookieString(cookieString); + if (cookies.length === 0) { + throw new Error( + "FACEBOOK_COOKIE environment variable contains no valid cookies. " + + 'Expected format: "name1=value1; name2=value2;"', + ); + } - // Save to file for future use - try { - await Bun.write(cookiePath, JSON.stringify(cookies, null, 2)); - console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`); - } catch (error) { - console.warn(`Could not save cookies to ${cookiePath}: ${error}`); - // Continue anyway, we have the cookies in memory - } + // Save to file for future use + try { + await Bun.write(cookiePath, JSON.stringify(cookies, null, 2)); + console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`); + } catch (error) { + console.warn(`Could not save cookies to ${cookiePath}: ${error}`); + // Continue anyway, we have the cookies in memory + } - return cookies; + return cookies; } /** * Format cookies array into Cookie header string */ function formatCookiesForHeader(cookies: Cookie[], domain: string): string { - const validCookies = cookies - .filter((cookie) => { - // Check if cookie applies to this domain - if (cookie.domain.startsWith(".")) { - // Domain cookie (applies to subdomains) - return ( - domain.endsWith(cookie.domain.slice(1)) || - domain === cookie.domain.slice(1) - ); - } - // Host-only cookie - return cookie.domain === domain; - }) - .filter((cookie) => { - // Check expiration - if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) { - return false; // Expired - } - return true; - }); + const validCookies = cookies + .filter((cookie) => { + // Check if cookie applies to this domain + if (cookie.domain.startsWith(".")) { + // Domain cookie (applies to subdomains) + return ( + domain.endsWith(cookie.domain.slice(1)) || + domain === cookie.domain.slice(1) + ); + } + // Host-only cookie + return cookie.domain === domain; + }) + .filter((cookie) => { + // Check expiration + if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) { + return false; // Expired + } + return true; + }); - return validCookies - .map((cookie) => `${cookie.name}=${cookie.value}`) - .join("; "); + return validCookies + .map((cookie) => `${cookie.name}=${cookie.value}`) + .join("; "); } class HttpError extends Error { - constructor( - message: string, - public readonly status: number, - public readonly url: string - ) { - super(message); - this.name = "HttpError"; - } + constructor( + message: string, + public readonly status: number, + public readonly url: string, + ) { + super(message); + this.name = "HttpError"; + } } // ----------------------------- Extraction Metrics ----------------------------- @@ -381,40 +382,40 @@ class HttpError extends Error { * Monitor API extraction success/failure for detecting changes */ const extractionStats = { - totalExtractions: 0, - successfulExtractions: 0, - failedExtractions: 0, - lastApiChangeDetected: null as Date | null, + totalExtractions: 0, + successfulExtractions: 0, + failedExtractions: 0, + lastApiChangeDetected: null as Date | null, }; /** * Log extraction metrics for monitoring API stability */ function logExtractionMetrics(success: boolean, itemId?: string) { - extractionStats.totalExtractions++; - if (success) { - extractionStats.successfulExtractions++; - } else { - extractionStats.failedExtractions++; - } + extractionStats.totalExtractions++; + if (success) { + extractionStats.successfulExtractions++; + } else { + extractionStats.failedExtractions++; + } - // Log warning if extraction success rate drops below 80% - const successRate = - extractionStats.successfulExtractions / extractionStats.totalExtractions; - if ( - extractionStats.totalExtractions > 10 && - successRate < 0.8 && - !extractionStats.lastApiChangeDetected - ) { - console.warn( - "Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes." - ); - extractionStats.lastApiChangeDetected = new Date(); - } + // Log warning if extraction success rate drops below 80% + const successRate = + extractionStats.successfulExtractions / extractionStats.totalExtractions; + if ( + extractionStats.totalExtractions > 10 && + successRate < 0.8 && + !extractionStats.lastApiChangeDetected + ) { + console.warn( + "Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.", + ); + extractionStats.lastApiChangeDetected = new Date(); + } - if (!success && itemId) { - console.warn(`Facebook API extraction failed for item ${itemId}`); - } + if (!success && itemId) { + console.warn(`Facebook API extraction failed for item ${itemId}`); + } } // ----------------------------- HTTP Client ----------------------------- @@ -426,93 +427,93 @@ function logExtractionMetrics(success: boolean, itemId?: string) { - Supports custom cookies for Facebook authentication */ async function fetchHtml( - url: string, - DELAY_MS: number, - opts?: { - maxRetries?: number; - retryBaseMs?: number; - onRateInfo?: (remaining: string | null, reset: string | null) => void; - cookies?: string; - } + url: string, + DELAY_MS: number, + opts?: { + maxRetries?: number; + retryBaseMs?: number; + onRateInfo?: (remaining: string | null, reset: string | null) => void; + cookies?: string; + }, ): Promise { - const maxRetries = opts?.maxRetries ?? 3; - const retryBaseMs = opts?.retryBaseMs ?? 500; + const maxRetries = opts?.maxRetries ?? 3; + const retryBaseMs = opts?.retryBaseMs ?? 500; - for (let attempt = 0; attempt <= maxRetries; attempt++) { - try { - const headers: Record = { - accept: - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", - "accept-encoding": "gzip, deflate, br", - "cache-control": "no-cache", - "upgrade-insecure-requests": "1", - "sec-fetch-dest": "document", - "sec-fetch-mode": "navigate", - "sec-fetch-site": "none", - "sec-fetch-user": "?1", - "user-agent": - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", - }; + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const headers: Record = { + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", + "accept-encoding": "gzip, deflate, br", + "cache-control": "no-cache", + "upgrade-insecure-requests": "1", + "sec-fetch-dest": "document", + "sec-fetch-mode": "navigate", + "sec-fetch-site": "none", + "sec-fetch-user": "?1", + "user-agent": + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + }; - // Add cookies if provided - if (opts?.cookies) { - headers.cookie = opts.cookies; - } + // Add cookies if provided + if (opts?.cookies) { + headers.cookie = opts.cookies; + } - const res = await fetch(url, { - method: "GET", - headers, - }); + const res = await fetch(url, { + method: "GET", + headers, + }); - const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); - const rateLimitReset = res.headers.get("X-RateLimit-Reset"); - opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); + const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); + const rateLimitReset = res.headers.get("X-RateLimit-Reset"); + opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); - if (!res.ok) { - // Respect 429 reset if provided - if (res.status === 429) { - const resetSeconds = rateLimitReset - ? Number(rateLimitReset) - : Number.NaN; - const waitMs = Number.isFinite(resetSeconds) - ? Math.max(0, resetSeconds * 1000) - : (attempt + 1) * retryBaseMs; - await delay(waitMs); - continue; - } - // For Facebook, 400 often means authentication required - // Don't retry 4xx client errors except 429 - if (res.status >= 400 && res.status < 500 && res.status !== 429) { - throw new HttpError( - `Request failed with status ${res.status} (Facebook may require authentication cookies for access)`, - res.status, - url - ); - } - // Retry on 5xx - if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { - await delay((attempt + 1) * retryBaseMs); - continue; - } - throw new HttpError( - `Request failed with status ${res.status}`, - res.status, - url - ); - } + if (!res.ok) { + // Respect 429 reset if provided + if (res.status === 429) { + const resetSeconds = rateLimitReset + ? Number(rateLimitReset) + : Number.NaN; + const waitMs = Number.isFinite(resetSeconds) + ? Math.max(0, resetSeconds * 1000) + : (attempt + 1) * retryBaseMs; + await delay(waitMs); + continue; + } + // For Facebook, 400 often means authentication required + // Don't retry 4xx client errors except 429 + if (res.status >= 400 && res.status < 500 && res.status !== 429) { + throw new HttpError( + `Request failed with status ${res.status} (Facebook may require authentication cookies for access)`, + res.status, + url, + ); + } + // Retry on 5xx + if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { + await delay((attempt + 1) * retryBaseMs); + continue; + } + throw new HttpError( + `Request failed with status ${res.status}`, + res.status, + url, + ); + } - const html = await res.text(); - // Respect per-request delay to keep at or under REQUESTS_PER_SECOND - await delay(DELAY_MS); - return html; - } catch (err) { - if (attempt >= maxRetries) throw err; - await delay((attempt + 1) * retryBaseMs); - } - } + const html = await res.text(); + // Respect per-request delay to keep at or under REQUESTS_PER_SECOND + await delay(DELAY_MS); + return html; + } catch (err) { + if (attempt >= maxRetries) throw err; + await delay((attempt + 1) * retryBaseMs); + } + } - throw new Error("Exhausted retries without response"); + throw new Error("Exhausted retries without response"); } // ----------------------------- Parsing ----------------------------- @@ -521,86 +522,85 @@ async function fetchHtml( Extract marketplace search data from Facebook page script tags */ export function extractFacebookMarketplaceData( - htmlString: HTMLString + htmlString: HTMLString, ): FacebookAdNode[] | null { - const { document } = parseHTML(htmlString); - const scripts = document.querySelectorAll("script"); + const { document } = parseHTML(htmlString); + const scripts = document.querySelectorAll("script"); - let marketplaceData: FacebookMarketplaceSearch | null = null; + let marketplaceData: FacebookMarketplaceSearch | null = null; - // Find the script containing the require data with marketplace_search - for (const script of Array.from(scripts) as HTMLScriptElement[]) { - const scriptText = script.textContent; - if (!scriptText) continue; + // Find the script containing the require data with marketplace_search + for (const script of Array.from(scripts) as HTMLScriptElement[]) { + const scriptText = script.textContent; + if (!scriptText) continue; - try { - const parsed = JSON.parse(scriptText); + try { + const parsed = JSON.parse(scriptText); - // First check if this is the direct data structure (like in examples) - if (parsed.require && Array.isArray(parsed.require)) { - // Try multiple navigation paths to find marketplace_search - const paths = [ - // Original path from example - () => - parsed.require[0][3][0].__bbox.require[0][3][1].__bbox.result.data - .marketplace_search, - // Alternative path structure - () => - parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search, - // Another variation - () => parsed.require[0][3][0].__bbox.result.data.marketplace_search, - // Direct access for some responses - () => { - for (const item of parsed.require) { - if (item && item.length >= 4 && item[3]) { - const bbox = item[3]?.__bbox?.result?.data?.marketplace_search; - if (bbox) return bbox; - } - } - return null; - }, - ]; + // First check if this is the direct data structure (like in examples) + if (parsed.require && Array.isArray(parsed.require)) { + // Try multiple navigation paths to find marketplace_search + const paths = [ + // Original path from example + () => + parsed.require[0][3][0].__bbox.require[0][3][1].__bbox.result.data + .marketplace_search, + // Alternative path structure + () => + parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search, + // Another variation + () => parsed.require[0][3][0].__bbox.result.data.marketplace_search, + // Direct access for some responses + () => { + for (const item of parsed.require) { + if (item && item.length >= 4 && item[3]) { + const bbox = item[3]?.__bbox?.result?.data?.marketplace_search; + if (bbox) return bbox; + } + } + return null; + }, + ]; - for (const getData of paths) { - try { - const result = getData(); - if ( - result && - isRecord(result) && - (result as any).feed_units?.edges?.length > 0 - ) { - marketplaceData = result as FacebookMarketplaceSearch; - break; - } - } catch { - } - } + for (const getData of paths) { + try { + const result = getData(); + if ( + result && + isRecord(result) && + (result as Record).feed_units?.edges?.length > 0 + ) { + marketplaceData = result as FacebookMarketplaceSearch; + break; + } + } catch {} + } - if (marketplaceData) break; - } + if (marketplaceData) break; + } - // Also check for direct marketplace_search in the parsed data - if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) { - const searchData = - parsed.marketplace_search as FacebookMarketplaceSearch; - if (searchData.feed_units?.edges?.length ?? 0 > 0) { - marketplaceData = searchData; - break; - } - } - } catch { - } - } + // Also check for direct marketplace_search in the parsed data + if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) { + const searchData = + parsed.marketplace_search as FacebookMarketplaceSearch; + const feedLength = searchData.feed_units?.edges?.length ?? 0; + if (feedLength > 0) { + marketplaceData = searchData; + break; + } + } + } catch {} + } - if (!marketplaceData?.feed_units?.edges?.length) { - console.warn("No marketplace data found in HTML response"); - return null; - } + if (!marketplaceData?.feed_units?.edges?.length) { + console.warn("No marketplace data found in HTML response"); + return null; + } - console.log( - `Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings` - ); - return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node })); + console.log( + `Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`, + ); + return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node })); } /** @@ -608,259 +608,261 @@ export function extractFacebookMarketplaceData( Updated for 2026 Facebook Marketplace API structure with multiple extraction paths */ export function extractFacebookItemData( - htmlString: HTMLString + htmlString: HTMLString, ): FacebookMarketplaceItem | null { - const { document } = parseHTML(htmlString); - const scripts = document.querySelectorAll("script"); + const { document } = parseHTML(htmlString); + const scripts = document.querySelectorAll("script"); - for (const script of scripts) { - const scriptText = script.textContent; - if (!scriptText) continue; + for (const script of scripts) { + const scriptText = script.textContent; + if (!scriptText) continue; - try { - const parsed = JSON.parse(scriptText); + try { + const parsed = JSON.parse(scriptText); - // Check for the require structure with marketplace product details - if (parsed.require && Array.isArray(parsed.require)) { - // Try multiple extraction paths discovered from reverse engineering - const extractionPaths = [ - // Path 1: Primary path from current API structure - () => - parsed.require[0][3].__bbox.result.data.viewer - .marketplace_product_details_page.target, - // Path 2: Alternative path with nested require - () => - parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data - .viewer.marketplace_product_details_page.target, - // Path 3: Variation without the [0] index - () => - parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data - .viewer.marketplace_product_details_page.target, - // Path 4-5: Additional fallback paths for edge cases - () => - parsed.require[0][3][1]?.__bbox?.result?.data?.viewer - ?.marketplace_product_details_page?.target, - () => - parsed.require[0][3][2]?.__bbox?.result?.data?.viewer - ?.marketplace_product_details_page?.target, - ]; + // Check for the require structure with marketplace product details + if (parsed.require && Array.isArray(parsed.require)) { + // Try multiple extraction paths discovered from reverse engineering + const extractionPaths = [ + // Path 1: Primary path from current API structure + () => + parsed.require[0][3].__bbox.result.data.viewer + .marketplace_product_details_page.target, + // Path 2: Alternative path with nested require + () => + parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data + .viewer.marketplace_product_details_page.target, + // Path 3: Variation without the [0] index + () => + parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data + .viewer.marketplace_product_details_page.target, + // Path 4-5: Additional fallback paths for edge cases + () => + parsed.require[0][3][1]?.__bbox?.result?.data?.viewer + ?.marketplace_product_details_page?.target, + () => + parsed.require[0][3][2]?.__bbox?.result?.data?.viewer + ?.marketplace_product_details_page?.target, + ]; - let pathIndex = 0; - for (const getPath of extractionPaths) { - try { - const targetData = getPath(); - if ( - targetData && - typeof targetData === "object" && - targetData.id && - targetData.marketplace_listing_title && - targetData.__typename === "GroupCommerceProductItem" - ) { - console.log( - `Successfully extracted Facebook item data using extraction path ${pathIndex + 1}` - ); - return targetData as FacebookMarketplaceItem; - } - } catch { - // Path not found or invalid, try next path - } - pathIndex++; - } + let pathIndex = 0; + for (const getPath of extractionPaths) { + try { + const targetData = getPath(); + if ( + targetData && + typeof targetData === "object" && + targetData.id && + targetData.marketplace_listing_title && + targetData.__typename === "GroupCommerceProductItem" + ) { + console.log( + `Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`, + ); + return targetData as FacebookMarketplaceItem; + } + } catch { + // Path not found or invalid, try next path + } + pathIndex++; + } - // Fallback: Search recursively for marketplace data in the parsed structure - const findMarketplaceData = ( - obj: unknown, - depth = 0, - maxDepth = 10 - ): FacebookMarketplaceItem | null => { - if (depth > maxDepth) return null; // Prevent infinite recursion - if (isRecord(obj)) { - // Check if this object matches the expected marketplace item structure - if ( - (obj as any).marketplace_listing_title && - (obj as any).id && - (obj as any).__typename === "GroupCommerceProductItem" && - (obj as any).redacted_description - ) { - return obj as unknown as FacebookMarketplaceItem; - } - // Recursively search nested objects and arrays - for (const key in obj) { - const value = obj[key]; - if (isRecord(value) || Array.isArray(value)) { - const result = findMarketplaceData(value, depth + 1, maxDepth); - if (result) return result; - } - } - } else if (Array.isArray(obj)) { - // Search through arrays - for (const item of obj) { - const result = findMarketplaceData(item, depth + 1, maxDepth); - if (result) return result; - } - } - return null; - }; + // Fallback: Search recursively for marketplace data in the parsed structure + const findMarketplaceData = ( + obj: unknown, + depth = 0, + maxDepth = 10, + ): FacebookMarketplaceItem | null => { + if (depth > maxDepth) return null; // Prevent infinite recursion + if (isRecord(obj)) { + // Check if this object matches the expected marketplace item structure + const candidate = obj as Record; + if ( + candidate.marketplace_listing_title && + candidate.id && + candidate.__typename === "GroupCommerceProductItem" && + candidate.redacted_description + ) { + return candidate as unknown as FacebookMarketplaceItem; + } + // Recursively search nested objects and arrays + for (const key in obj) { + const value = obj[key]; + if (isRecord(value) || Array.isArray(value)) { + const result = findMarketplaceData(value, depth + 1, maxDepth); + if (result) return result; + } + } + } else if (Array.isArray(obj)) { + // Search through arrays + for (const item of obj) { + const result = findMarketplaceData(item, depth + 1, maxDepth); + if (result) return result; + } + } + return null; + }; - // Search through the entire require structure - const recursiveResult = findMarketplaceData(parsed.require); - if (recursiveResult) { - console.log( - "Successfully extracted Facebook item data using recursive search" - ); - return recursiveResult; - } + // Search through the entire require structure + const recursiveResult = findMarketplaceData(parsed.require); + if (recursiveResult) { + console.log( + "Successfully extracted Facebook item data using recursive search", + ); + return recursiveResult; + } - // Additional search in other potential locations - if ( - parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page - ?.target - ) { - const bboxData = - parsed.__bbox.result.data.viewer.marketplace_product_details_page - .target; - if ( - bboxData && - typeof bboxData === "object" && - bboxData.id && - bboxData.marketplace_listing_title && - bboxData.__typename === "GroupCommerceProductItem" - ) { - console.log( - "Successfully extracted Facebook item data from __bbox structure" - ); - return bboxData as FacebookMarketplaceItem; - } - } - } - } catch { - } - } + // Additional search in other potential locations + if ( + parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page + ?.target + ) { + const bboxData = + parsed.__bbox.result.data.viewer.marketplace_product_details_page + .target; + if ( + bboxData && + typeof bboxData === "object" && + bboxData.id && + bboxData.marketplace_listing_title && + bboxData.__typename === "GroupCommerceProductItem" + ) { + console.log( + "Successfully extracted Facebook item data from __bbox structure", + ); + return bboxData as FacebookMarketplaceItem; + } + } + } + } catch {} + } - return null; + return null; } /** Parse Facebook marketplace search results into ListingDetails[] */ -export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] { - const results: FacebookListingDetails[] = []; +export function parseFacebookAds( + ads: FacebookAdNode[], +): FacebookListingDetails[] { + const results: FacebookListingDetails[] = []; - for (const adJson of ads) { - try { - const listing = adJson.node.listing; - const title = listing.marketplace_listing_title; - const priceObj = listing.listing_price; + for (const adJson of ads) { + try { + const listing = adJson.node.listing; + const title = listing.marketplace_listing_title; + const priceObj = listing.listing_price; - if (!title || !priceObj) continue; + if (!title || !priceObj) continue; - const id = listing.id; - const url = `https://www.facebook.com/marketplace/item/${id}`; + const id = listing.id; + const url = `https://www.facebook.com/marketplace/item/${id}`; - // Facebook stores price in different fields: - // - amount_with_offset_in_currency: Facebook's internal price encoding (not cents) - // - amount: dollars (like "1.00") - // - formatted_amount: human-readable price (like "CA$1") - let cents: number; - if (priceObj.amount != null) { - const dollars = - typeof priceObj.amount === "string" - ? Number.parseFloat(priceObj.amount) - : priceObj.amount; - cents = Math.round(dollars * 100); - } else if (priceObj.amount_with_offset_in_currency != null) { - // Fallback: try to extract cents from amount_with_offset_in_currency - // This appears to use some exchange rate/multiplier format - const encodedAmount = Number(priceObj.amount_with_offset_in_currency); - if (!Number.isNaN(encodedAmount) && encodedAmount > 0) { - // Estimate roughly - this field doesn't contain real cents - // Use formatted_amount to get the actual dollar amount - if (priceObj.formatted_amount) { - const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/); - if (match) { - const dollars = Number.parseFloat(match[0].replace(",", "")); - if (!Number.isNaN(dollars)) { - cents = Math.round(dollars * 100); - } else { - cents = encodedAmount; // fallback - } - } else { - cents = encodedAmount; // fallback - } - } else { - cents = encodedAmount; // fallback - } - } else { - continue; // Invalid price - } - } else { - continue; // No price available - } + // Facebook stores price in different fields: + // - amount_with_offset_in_currency: Facebook's internal price encoding (not cents) + // - amount: dollars (like "1.00") + // - formatted_amount: human-readable price (like "CA$1") + let cents: number; + if (priceObj.amount != null) { + const dollars = + typeof priceObj.amount === "string" + ? Number.parseFloat(priceObj.amount) + : priceObj.amount; + cents = Math.round(dollars * 100); + } else if (priceObj.amount_with_offset_in_currency != null) { + // Fallback: try to extract cents from amount_with_offset_in_currency + // This appears to use some exchange rate/multiplier format + const encodedAmount = Number(priceObj.amount_with_offset_in_currency); + if (!Number.isNaN(encodedAmount) && encodedAmount > 0) { + // Estimate roughly - this field doesn't contain real cents + // Use formatted_amount to get the actual dollar amount + if (priceObj.formatted_amount) { + const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/); + if (match) { + const dollars = Number.parseFloat(match[0].replace(",", "")); + if (!Number.isNaN(dollars)) { + cents = Math.round(dollars * 100); + } else { + cents = encodedAmount; // fallback + } + } else { + cents = encodedAmount; // fallback + } + } else { + cents = encodedAmount; // fallback + } + } else { + continue; // Invalid price + } + } else { + continue; // No price available + } - if (!Number.isFinite(cents) || cents <= 0) continue; + if (!Number.isFinite(cents) || cents <= 0) continue; - // Extract address from location data if available - const cityName = - listing.location?.reverse_geocode?.city_page?.display_name; - const address = cityName || null; + // Extract address from location data if available + const cityName = + listing.location?.reverse_geocode?.city_page?.display_name; + const address = cityName || null; - // Determine listing status from Facebook flags - let listingStatus: string | undefined ; - if (listing.is_sold) { - listingStatus = "SOLD"; - } else if (listing.is_pending) { - listingStatus = "PENDING"; - } else if (listing.is_live) { - listingStatus = "ACTIVE"; - } else if (listing.is_hidden) { - listingStatus = "HIDDEN"; - } + // Determine listing status from Facebook flags + let listingStatus: string | undefined; + if (listing.is_sold) { + listingStatus = "SOLD"; + } else if (listing.is_pending) { + listingStatus = "PENDING"; + } else if (listing.is_live) { + listingStatus = "ACTIVE"; + } else if (listing.is_hidden) { + listingStatus = "HIDDEN"; + } - // Format creation date if available - const creationDate = listing.creation_time - ? new Date(listing.creation_time * 1000).toISOString() - : undefined; + // Format creation date if available + const creationDate = listing.creation_time + ? new Date(listing.creation_time * 1000).toISOString() + : undefined; - // Extract image and video URLs - const imageUrl = listing.primary_listing_photo?.image?.uri; - const videoUrl = listing.listing_video - ? `https://www.facebook.com/${listing.listing_video.id}/` - : undefined; + // Extract image and video URLs + const imageUrl = listing.primary_listing_photo?.image?.uri; + const videoUrl = listing.listing_video + ? `https://www.facebook.com/${listing.listing_video.id}/` + : undefined; - // Extract seller information - const seller = listing.marketplace_listing_seller - ? { - name: listing.marketplace_listing_seller.name, - id: listing.marketplace_listing_seller.id, - } - : undefined; + // Extract seller information + const seller = listing.marketplace_listing_seller + ? { + name: listing.marketplace_listing_seller.name, + id: listing.marketplace_listing_seller.id, + } + : undefined; - const listingDetails: FacebookListingDetails = { - url, - title, - listingPrice: { - amountFormatted: - priceObj.formatted_amount || formatCentsToCurrency(cents / 100, "en-CA"), - cents, - currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD - }, - address, - creationDate, - listingType: "item", // Default type for marketplace listings - listingStatus, - categoryId: listing.marketplace_listing_category_id, - imageUrl, - videoUrl, - seller, - deliveryTypes: listing.delivery_types, - }; + const listingDetails: FacebookListingDetails = { + url, + title, + listingPrice: { + amountFormatted: + priceObj.formatted_amount || + formatCentsToCurrency(cents / 100, "en-CA"), + cents, + currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD + }, + address, + creationDate, + listingType: "item", // Default type for marketplace listings + listingStatus, + categoryId: listing.marketplace_listing_category_id, + imageUrl, + videoUrl, + seller, + deliveryTypes: listing.delivery_types, + }; - results.push(listingDetails); - } catch { - } - } + results.push(listingDetails); + } catch {} + } - return results; + return results; } /** @@ -868,349 +870,350 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[ Updated for 2026 GroupCommerceProductItem structure */ export function parseFacebookItem( - item: FacebookMarketplaceItem + item: FacebookMarketplaceItem, ): FacebookListingDetails | null { - try { - const title = item.marketplace_listing_title || item.custom_title; - if (!title) return null; + try { + const title = item.marketplace_listing_title || item.custom_title; + if (!title) return null; - const url = `https://www.facebook.com/marketplace/item/${item.id}`; + const url = `https://www.facebook.com/marketplace/item/${item.id}`; - // Extract price information - let cents = 0; - let currency = "CAD"; // Default - let amountFormatted = item.formatted_price?.text || "FREE"; + // Extract price information + let cents = 0; + let currency = "CAD"; // Default + let amountFormatted = item.formatted_price?.text || "FREE"; - if (item.listing_price) { - currency = item.listing_price.currency || "CAD"; - if (item.listing_price.amount && item.listing_price.amount !== "0.00") { - const amount = Number.parseFloat(item.listing_price.amount); - if (!Number.isNaN(amount)) { - cents = Math.round(amount * 100); - amountFormatted = - item.formatted_price?.text || formatCentsToCurrency(cents / 100, "en-CA"); - } - } - } + if (item.listing_price) { + currency = item.listing_price.currency || "CAD"; + if (item.listing_price.amount && item.listing_price.amount !== "0.00") { + const amount = Number.parseFloat(item.listing_price.amount); + if (!Number.isNaN(amount)) { + cents = Math.round(amount * 100); + amountFormatted = + item.formatted_price?.text || + formatCentsToCurrency(cents / 100, "en-CA"); + } + } + } - // Extract description - const description = item.redacted_description?.text; + // Extract description + const description = item.redacted_description?.text; - // Extract location - const address = item.location_text?.text || null; + // Extract location + const address = item.location_text?.text || null; - // Extract seller information - const seller = item.marketplace_listing_seller - ? { - name: item.marketplace_listing_seller.name, - id: item.marketplace_listing_seller.id, - } - : undefined; + // Extract seller information + const seller = item.marketplace_listing_seller + ? { + name: item.marketplace_listing_seller.name, + id: item.marketplace_listing_seller.id, + } + : undefined; - // Determine listing status - let listingStatus: string | undefined; - if (item.is_sold) { - listingStatus = "SOLD"; - } else if (item.is_pending) { - listingStatus = "PENDING"; - } else if (item.is_live) { - listingStatus = "ACTIVE"; - } else if (item.is_hidden) { - listingStatus = "HIDDEN"; - } + // Determine listing status + let listingStatus: string | undefined; + if (item.is_sold) { + listingStatus = "SOLD"; + } else if (item.is_pending) { + listingStatus = "PENDING"; + } else if (item.is_live) { + listingStatus = "ACTIVE"; + } else if (item.is_hidden) { + listingStatus = "HIDDEN"; + } - // Format creation date - const creationDate = item.creation_time - ? new Date(item.creation_time * 1000).toISOString() - : undefined; + // Format creation date + const creationDate = item.creation_time + ? new Date(item.creation_time * 1000).toISOString() + : undefined; - // Determine listing type based on category or vehicle data - let listingType = "item"; - if (item.vehicle_make_display_name || item.vehicle_odometer_data) { - listingType = "vehicle"; - } + // Determine listing type based on category or vehicle data + let listingType = "item"; + if (item.vehicle_make_display_name || item.vehicle_odometer_data) { + listingType = "vehicle"; + } - const listingDetails: FacebookListingDetails = { - url, - title, - description, - listingPrice: { - amountFormatted, - cents, - currency, - }, - address, - creationDate, - listingType, - listingStatus, - categoryId: item.marketplace_listing_category_id, - seller, - deliveryTypes: item.delivery_types, - }; + const listingDetails: FacebookListingDetails = { + url, + title, + description, + listingPrice: { + amountFormatted, + cents, + currency, + }, + address, + creationDate, + listingType, + listingStatus, + categoryId: item.marketplace_listing_category_id, + seller, + deliveryTypes: item.delivery_types, + }; - return listingDetails; - } catch (error) { - console.warn(`Failed to parse Facebook item ${item.id}:`, error); - return null; - } + return listingDetails; + } catch (error) { + console.warn(`Failed to parse Facebook item ${item.id}:`, error); + return null; + } } // ----------------------------- Main ----------------------------- export default async function fetchFacebookItems( - SEARCH_QUERY: string, - REQUESTS_PER_SECOND = 1, - LOCATION = "toronto", - MAX_ITEMS = 25, - cookiesSource?: string, - cookiePath?: string + SEARCH_QUERY: string, + REQUESTS_PER_SECOND = 1, + LOCATION = "toronto", + MAX_ITEMS = 25, + cookiesSource?: string, + cookiePath?: string, ) { - // Load Facebook cookies - required for Facebook Marketplace access - let cookies: Cookie[]; - if (cookiesSource) { - // Use provided cookie source (backward compatibility) - cookies = await loadFacebookCookies(cookiesSource); - } else { - // Auto-load from file or parse from env var - cookies = await ensureFacebookCookies(cookiePath); - } + // Load Facebook cookies - required for Facebook Marketplace access + let cookies: Cookie[]; + if (cookiesSource) { + // Use provided cookie source (backward compatibility) + cookies = await loadFacebookCookies(cookiesSource); + } else { + // Auto-load from file or parse from env var + cookies = await ensureFacebookCookies(cookiePath); + } - if (cookies.length === 0) { - throw new Error( - "Facebook cookies are required for marketplace access. " + - "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies." - ); - } + if (cookies.length === 0) { + throw new Error( + "Facebook cookies are required for marketplace access. " + + "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.", + ); + } - // Format cookies for HTTP header - const domain = "www.facebook.com"; - const cookiesHeader = formatCookiesForHeader(cookies, domain); - if (!cookiesHeader) { - throw new Error( - "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain." - ); - } + // Format cookies for HTTP header + const domain = "www.facebook.com"; + const cookiesHeader = formatCookiesForHeader(cookies, domain); + if (!cookiesHeader) { + throw new Error( + "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", + ); + } - const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); - // Encode search query for URL - const encodedQuery = encodeURIComponent(SEARCH_QUERY); + // Encode search query for URL + const encodedQuery = encodeURIComponent(SEARCH_QUERY); - // Facebook marketplace URL structure - const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`; + // Facebook marketplace URL structure + const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`; - console.log(`Fetching Facebook marketplace: ${searchUrl}`); - console.log(`Using ${cookies.length} cookies for authentication`); + console.log(`Fetching Facebook marketplace: ${searchUrl}`); + console.log(`Using ${cookies.length} cookies for authentication`); - let searchHtml: string; - try { - searchHtml = await fetchHtml(searchUrl, DELAY_MS, { - maxRetries: 3, - onRateInfo: (remaining, reset) => { - if (remaining && reset) { - console.log( - `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s` - ); - } - }, - cookies: cookiesHeader, - }); - } catch (err) { - if (err instanceof HttpError) { - console.warn( - `\nFacebook marketplace access failed (${err.status}): ${err.message}` - ); - if (err.status === 400 || err.status === 401 || err.status === 403) { - console.warn( - "This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies." - ); - } - return []; - } - throw err; - } + let searchHtml: string; + try { + searchHtml = await fetchHtml(searchUrl, DELAY_MS, { + maxRetries: 3, + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log( + `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`, + ); + } + }, + cookies: cookiesHeader, + }); + } catch (err) { + if (err instanceof HttpError) { + console.warn( + `\nFacebook marketplace access failed (${err.status}): ${err.message}`, + ); + if (err.status === 400 || err.status === 401 || err.status === 403) { + console.warn( + "This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.", + ); + } + return []; + } + throw err; + } - const ads = extractFacebookMarketplaceData(searchHtml); - if (!ads || ads.length === 0) { - console.warn("No ads parsed from Facebook marketplace page."); - return []; - } + const ads = extractFacebookMarketplaceData(searchHtml); + if (!ads || ads.length === 0) { + console.warn("No ads parsed from Facebook marketplace page."); + return []; + } - console.log(`\nFound ${ads.length} raw ads. Processing...`); + console.log(`\nFound ${ads.length} raw ads. Processing...`); - const progressBar = new cliProgress.SingleBar( - {}, - cliProgress.Presets.shades_classic - ); - const totalProgress = ads.length; - const currentProgress = 0; - progressBar.start(totalProgress, currentProgress); + const progressBar = new cliProgress.SingleBar( + {}, + cliProgress.Presets.shades_classic, + ); + const totalProgress = ads.length; + const currentProgress = 0; + progressBar.start(totalProgress, currentProgress); - const items = parseFacebookAds(ads); + const items = parseFacebookAds(ads); - // Filter to only priced items (already done in parseFacebookAds) - const pricedItems = items.filter( - (item) => item.listingPrice?.cents && item.listingPrice.cents > 0 - ); + // Filter to only priced items (already done in parseFacebookAds) + const pricedItems = items.filter( + (item) => item.listingPrice?.cents && item.listingPrice.cents > 0, + ); - progressBar.update(totalProgress); - progressBar.stop(); + progressBar.update(totalProgress); + progressBar.stop(); - console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`); - return pricedItems.slice(0, MAX_ITEMS); // Limit results + console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`); + return pricedItems.slice(0, MAX_ITEMS); // Limit results } /** * Fetch individual Facebook marketplace item details with enhanced error handling */ export async function fetchFacebookItem( - itemId: string, - cookiesSource?: string, - cookiePath?: string + itemId: string, + cookiesSource?: string, + cookiePath?: string, ): Promise { - // Load Facebook cookies - required for Facebook Marketplace access - let cookies: Cookie[]; - if (cookiesSource) { - // Use provided cookie source (backward compatibility) - cookies = await loadFacebookCookies(cookiesSource); - } else { - // Auto-load from file or parse from env var - cookies = await ensureFacebookCookies(cookiePath); - } + // Load Facebook cookies - required for Facebook Marketplace access + let cookies: Cookie[]; + if (cookiesSource) { + // Use provided cookie source (backward compatibility) + cookies = await loadFacebookCookies(cookiesSource); + } else { + // Auto-load from file or parse from env var + cookies = await ensureFacebookCookies(cookiePath); + } - if (cookies.length === 0) { - throw new Error( - "Facebook cookies are required for marketplace access. " + - "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies." - ); - } + if (cookies.length === 0) { + throw new Error( + "Facebook cookies are required for marketplace access. " + + "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.", + ); + } - // Format cookies for HTTP header - const domain = "www.facebook.com"; - const cookiesHeader = formatCookiesForHeader(cookies, domain); - if (!cookiesHeader) { - throw new Error( - "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain." - ); - } + // Format cookies for HTTP header + const domain = "www.facebook.com"; + const cookiesHeader = formatCookiesForHeader(cookies, domain); + if (!cookiesHeader) { + throw new Error( + "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", + ); + } - const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`; + const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`; - console.log(`Fetching Facebook marketplace item: ${itemUrl}`); + console.log(`Fetching Facebook marketplace item: ${itemUrl}`); - let itemHtml: string; - try { - itemHtml = await fetchHtml(itemUrl, 1000, { - onRateInfo: (remaining, reset) => { - if (remaining && reset) { - console.log( - `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s` - ); - } - }, - cookies: cookiesHeader, - }); - } catch (err) { - if (err instanceof HttpError) { - console.warn( - `\nFacebook marketplace item access failed (${err.status}): ${err.message}` - ); + let itemHtml: string; + try { + itemHtml = await fetchHtml(itemUrl, 1000, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log( + `\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`, + ); + } + }, + cookies: cookiesHeader, + }); + } catch (err) { + if (err instanceof HttpError) { + console.warn( + `\nFacebook marketplace item access failed (${err.status}): ${err.message}`, + ); - // Enhanced error handling based on status codes - switch (err.status) { - case 400: - case 401: - case 403: - console.warn( - "Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies." - ); - console.warn( - "Try logging out and back into Facebook, then export fresh cookies." - ); - break; - case 404: - console.warn( - "Listing not found: The marketplace item may have been removed, sold, or the URL is invalid." - ); - break; - case 429: - console.warn( - "Rate limited: Too many requests. Facebook is blocking access temporarily." - ); - break; - case 500: - case 502: - case 503: - console.warn( - "Facebook server error: Marketplace may be temporarily unavailable." - ); - break; - default: - console.warn(`Unexpected error status: ${err.status}`); - } - return null; - } - throw err; - } + // Enhanced error handling based on status codes + switch (err.status) { + case 400: + case 401: + case 403: + console.warn( + "Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.", + ); + console.warn( + "Try logging out and back into Facebook, then export fresh cookies.", + ); + break; + case 404: + console.warn( + "Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.", + ); + break; + case 429: + console.warn( + "Rate limited: Too many requests. Facebook is blocking access temporarily.", + ); + break; + case 500: + case 502: + case 503: + console.warn( + "Facebook server error: Marketplace may be temporarily unavailable.", + ); + break; + default: + console.warn(`Unexpected error status: ${err.status}`); + } + return null; + } + throw err; + } - const itemData = extractFacebookItemData(itemHtml); - if (!itemData) { - logExtractionMetrics(false, itemId); - // Enhanced checking for specific failure scenarios - if ( - itemHtml.includes("This listing is no longer available") || - itemHtml.includes("listing has been removed") || - itemHtml.includes("This item has been sold") - ) { - console.warn( - `Item ${itemId} appears to be sold or removed from marketplace.` - ); - return null; - } + const itemData = extractFacebookItemData(itemHtml); + if (!itemData) { + logExtractionMetrics(false, itemId); + // Enhanced checking for specific failure scenarios + if ( + itemHtml.includes("This listing is no longer available") || + itemHtml.includes("listing has been removed") || + itemHtml.includes("This item has been sold") + ) { + console.warn( + `Item ${itemId} appears to be sold or removed from marketplace.`, + ); + return null; + } - if ( - itemHtml.includes("log in to Facebook") || - itemHtml.includes("You must log in") || - itemHtml.includes("authentication required") - ) { - console.warn( - `Authentication failed for item ${itemId}. Cookies may be expired.` - ); - return null; - } + if ( + itemHtml.includes("log in to Facebook") || + itemHtml.includes("You must log in") || + itemHtml.includes("authentication required") + ) { + console.warn( + `Authentication failed for item ${itemId}. Cookies may be expired.`, + ); + return null; + } - console.warn( - `No item data found in Facebook marketplace page for item ${itemId}. This may indicate:` - ); - console.warn(" - The listing was removed or sold"); - console.warn(" - Authentication issues"); - console.warn(" - Facebook changed their API structure"); - console.warn(" - Network or parsing issues"); - return null; - } + console.warn( + `No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`, + ); + console.warn(" - The listing was removed or sold"); + console.warn(" - Authentication issues"); + console.warn(" - Facebook changed their API structure"); + console.warn(" - Network or parsing issues"); + return null; + } - logExtractionMetrics(true, itemId); - console.log(`Successfully extracted data for item ${itemId}`); + logExtractionMetrics(true, itemId); + console.log(`Successfully extracted data for item ${itemId}`); - const parsedItem = parseFacebookItem(itemData); - if (!parsedItem) { - console.warn(`Failed to parse item ${itemId}: Invalid data structure`); - return null; - } + const parsedItem = parseFacebookItem(itemData); + if (!parsedItem) { + console.warn(`Failed to parse item ${itemId}: Invalid data structure`); + return null; + } - // Check for sold/removed status in the parsed data with proper precedence - if (itemData.is_sold) { - console.warn(`Item ${itemId} is marked as sold in the marketplace.`); - // Still return the data but mark it as sold - parsedItem.listingStatus = "SOLD"; - } else if (!itemData.is_live) { - console.warn(`Item ${itemId} is not live/active in the marketplace.`); - parsedItem.listingStatus = itemData.is_hidden - ? "HIDDEN" - : itemData.is_pending - ? "PENDING" - : "INACTIVE"; - } + // Check for sold/removed status in the parsed data with proper precedence + if (itemData.is_sold) { + console.warn(`Item ${itemId} is marked as sold in the marketplace.`); + // Still return the data but mark it as sold + parsedItem.listingStatus = "SOLD"; + } else if (!itemData.is_live) { + console.warn(`Item ${itemId} is not live/active in the marketplace.`); + parsedItem.listingStatus = itemData.is_hidden + ? "HIDDEN" + : itemData.is_pending + ? "PENDING" + : "INACTIVE"; + } - return parsedItem; + return parsedItem; } diff --git a/packages/core/src/scrapers/kijiji.ts b/packages/core/src/scrapers/kijiji.ts index ab77117..8bf0aee 100644 --- a/packages/core/src/scrapers/kijiji.ts +++ b/packages/core/src/scrapers/kijiji.ts @@ -1,188 +1,188 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ + +import cliProgress from "cli-progress"; import { parseHTML } from "linkedom"; import unidecode from "unidecode"; -import cliProgress from "cli-progress"; -import { - fetchHtml, - isRecord, - HttpError, - NetworkError, - ParseError, - RateLimitError, - ValidationError, -} from "../utils/http"; -import { delay } from "../utils/delay"; -import { formatCentsToCurrency } from "../utils/format"; import type { HTMLString } from "../types/common"; +import { formatCentsToCurrency } from "../utils/format"; +import { + fetchHtml, + HttpError, + isRecord, + NetworkError, + ParseError, + RateLimitError, + ValidationError, +} from "../utils/http"; // ----------------------------- Types ----------------------------- type SearchListing = { - name: string; - listingLink: string; + name: string; + listingLink: string; }; type ApolloRecord = Record; interface ApolloSearchItem { - url?: string; - title?: string; - [k: string]: unknown; + url?: string; + title?: string; + [k: string]: unknown; } interface ApolloListingRoot { - url?: string; - title?: string; - description?: string; - price?: { amount?: number | string; currency?: string; type?: string }; - type?: string; - status?: string; - activationDate?: string; - endDate?: string; - metrics?: { views?: number | string }; - location?: { - address?: string | null; - id?: number; - name?: string; - coordinates?: { latitude: number; longitude: number }; - }; - imageUrls?: string[]; - imageCount?: number; - categoryId?: number; - adSource?: string; - flags?: { topAd?: boolean; priceDrop?: boolean }; - posterInfo?: { posterId?: string; rating?: number }; - attributes?: Array<{ canonicalName?: string; canonicalValues?: string[] }>; - [k: string]: unknown; + url?: string; + title?: string; + description?: string; + price?: { amount?: number | string; currency?: string; type?: string }; + type?: string; + status?: string; + activationDate?: string; + endDate?: string; + metrics?: { views?: number | string }; + location?: { + address?: string | null; + id?: number; + name?: string; + coordinates?: { latitude: number; longitude: number }; + }; + imageUrls?: string[]; + imageCount?: number; + categoryId?: number; + adSource?: string; + flags?: { topAd?: boolean; priceDrop?: boolean }; + posterInfo?: { posterId?: string; rating?: number }; + attributes?: Array<{ canonicalName?: string; canonicalValues?: string[] }>; + [k: string]: unknown; } // Keep existing interface for backward compatibility export interface KijijiListingDetails { - url: string; - title: string; - description?: string; - listingPrice?: { - amountFormatted: string; - cents?: number; - currency?: string; - }; - listingType?: string; - listingStatus?: string; - creationDate?: string; - endDate?: string; - numberOfViews?: number; - address?: string | null; + url: string; + title: string; + description?: string; + listingPrice?: { + amountFormatted: string; + cents?: number; + currency?: string; + }; + listingType?: string; + listingStatus?: string; + creationDate?: string; + endDate?: string; + numberOfViews?: number; + address?: string | null; } // New comprehensive interface for detailed listings export interface DetailedListing extends KijijiListingDetails { - images: string[]; - categoryId: number; - adSource: string; - flags: { - topAd: boolean; - priceDrop: boolean; - }; - attributes: Record; - location: { - id: number; - name: string; - coordinates?: { - latitude: number; - longitude: number; - }; - }; - sellerInfo?: { - posterId: string; - rating?: number; - accountType?: string; - memberSince?: string; - reviewCount?: number; - reviewScore?: number; - }; + images: string[]; + categoryId: number; + adSource: string; + flags: { + topAd: boolean; + priceDrop: boolean; + }; + attributes: Record; + location: { + id: number; + name: string; + coordinates?: { + latitude: number; + longitude: number; + }; + }; + sellerInfo?: { + posterId: string; + rating?: number; + accountType?: string; + memberSince?: string; + reviewCount?: number; + reviewScore?: number; + }; } // Configuration interfaces export interface SearchOptions { - location?: number | string; // Location ID or name - category?: number | string; // Category ID or name - keywords?: string; - sortBy?: "relevancy" | "date" | "price" | "distance"; - sortOrder?: "desc" | "asc"; - maxPages?: number; // Default: 5 - priceMin?: number; - priceMax?: number; + location?: number | string; // Location ID or name + category?: number | string; // Category ID or name + keywords?: string; + sortBy?: "relevancy" | "date" | "price" | "distance"; + sortOrder?: "desc" | "asc"; + maxPages?: number; // Default: 5 + priceMin?: number; + priceMax?: number; } export interface ListingFetchOptions { - includeImages?: boolean; // Default: true - sellerDataDepth?: "basic" | "detailed" | "full"; // Default: 'detailed' - includeClientSideData?: boolean; // Default: false + includeImages?: boolean; // Default: true + sellerDataDepth?: "basic" | "detailed" | "full"; // Default: 'detailed' + includeClientSideData?: boolean; // Default: false } // ----------------------------- Constants & Mappings ----------------------------- // Location mappings const LOCATION_MAPPINGS: Record = { - canada: 0, - ontario: 9004, - toronto: 1700273, - gta: 1700272, - oshawa: 1700275, - quebec: 9001, - "nova scotia": 9002, - alberta: 9003, - "new brunswick": 9005, - manitoba: 9006, - "british columbia": 9007, - newfoundland: 9008, - saskatchewan: 9009, - territories: 9010, - pei: 9011, - "prince edward island": 9011, + canada: 0, + ontario: 9004, + toronto: 1700273, + gta: 1700272, + oshawa: 1700275, + quebec: 9001, + "nova scotia": 9002, + alberta: 9003, + "new brunswick": 9005, + manitoba: 9006, + "british columbia": 9007, + newfoundland: 9008, + saskatchewan: 9009, + territories: 9010, + pei: 9011, + "prince edward island": 9011, }; // Category mappings (Buy & Sell main categories) const CATEGORY_MAPPINGS: Record = { - all: 0, - "buy-sell": 10, - "arts-collectibles": 12, - audio: 767, - "baby-items": 253, - "bags-luggage": 931, - bikes: 644, - books: 109, - cameras: 103, - cds: 104, - clothing: 274, - computers: 16, - "computer-accessories": 128, - electronics: 29659001, - "free-stuff": 17220001, - furniture: 235, - "garage-sales": 638, - "health-special-needs": 140, - "hobbies-crafts": 139, - "home-appliances": 107, - "home-indoor": 717, - "home-outdoor": 727, - jewellery: 133, - "musical-instruments": 17, - phones: 132, - "sporting-goods": 111, - tools: 110, - "toys-games": 108, - "tvs-video": 15093001, - "video-games": 141, - other: 26, + all: 0, + "buy-sell": 10, + "arts-collectibles": 12, + audio: 767, + "baby-items": 253, + "bags-luggage": 931, + bikes: 644, + books: 109, + cameras: 103, + cds: 104, + clothing: 274, + computers: 16, + "computer-accessories": 128, + electronics: 29659001, + "free-stuff": 17220001, + furniture: 235, + "garage-sales": 638, + "health-special-needs": 140, + "hobbies-crafts": 139, + "home-appliances": 107, + "home-indoor": 717, + "home-outdoor": 727, + jewellery: 133, + "musical-instruments": 17, + phones: 132, + "sporting-goods": 111, + tools: 110, + "toys-games": 108, + "tvs-video": 15093001, + "video-games": 141, + other: 26, }; // Sort parameter mappings const SORT_MAPPINGS: Record = { - relevancy: "MATCH", - date: "DATE", - price: "PRICE", - distance: "DISTANCE", + relevancy: "MATCH", + date: "DATE", + price: "PRICE", + distance: "DISTANCE", }; // ----------------------------- Utilities ----------------------------- @@ -193,104 +193,104 @@ const SEPS = new Set([" ", "–", "—", "/", ":", ";", ",", ".", "-"]); * Resolve location ID from name or return numeric ID */ export function resolveLocationId(location?: number | string): number { - if (typeof location === "number") return location; - if (typeof location === "string") { - const normalized = location.toLowerCase().replace(/\s+/g, "-"); - return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0) - } - return 0; // Default to Canada + if (typeof location === "number") return location; + if (typeof location === "string") { + const normalized = location.toLowerCase().replace(/\s+/g, "-"); + return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0) + } + return 0; // Default to Canada } /** * Resolve category ID from name or return numeric ID */ export function resolveCategoryId(category?: number | string): number { - if (typeof category === "number") return category; - if (typeof category === "string") { - const normalized = category.toLowerCase().replace(/\s+/g, "-"); - return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories - } - return 0; // Default to all categories + if (typeof category === "number") return category; + if (typeof category === "string") { + const normalized = category.toLowerCase().replace(/\s+/g, "-"); + return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories + } + return 0; // Default to all categories } /** * Build search URL with enhanced parameters */ export function buildSearchUrl( - keywords: string, - options: SearchOptions & { page?: number }, - BASE_URL = "https://www.kijiji.ca" + keywords: string, + options: SearchOptions & { page?: number }, + BASE_URL = "https://www.kijiji.ca", ): string { - const locationId = resolveLocationId(options.location); - const categoryId = resolveCategoryId(options.category); + const locationId = resolveLocationId(options.location); + const categoryId = resolveCategoryId(options.category); - const categorySlug = categoryId === 0 ? "buy-sell" : "buy-sell"; - const locationSlug = locationId === 0 ? "canada" : "canada"; + const categorySlug = categoryId === 0 ? "buy-sell" : "buy-sell"; + const locationSlug = locationId === 0 ? "canada" : "canada"; - let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`; + let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`; - const sortParam = options.sortBy - ? `&sort=${SORT_MAPPINGS[options.sortBy]}` - : ""; - const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC"; - const pageParam = - options.page && options.page > 1 ? `&page=${options.page}` : ""; + const sortParam = options.sortBy + ? `&sort=${SORT_MAPPINGS[options.sortBy]}` + : ""; + const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC"; + const pageParam = + options.page && options.page > 1 ? `&page=${options.page}` : ""; - url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`; + url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`; - return url; + return url; } /** * Slugifies a string for Kijiji search URLs */ export function slugify(input: string): string { - const s = unidecode(input).toLowerCase(); - const out: string[] = []; - let lastHyphen = false; + const s = unidecode(input).toLowerCase(); + const out: string[] = []; + let lastHyphen = false; - for (let i = 0; i < s.length; i++) { - const ch = s[i]; - if (!ch) continue; - const code = ch.charCodeAt(0); + for (let i = 0; i < s.length; i++) { + const ch = s[i]; + if (!ch) continue; + const code = ch.charCodeAt(0); - // a-z or 0-9 - if ((code >= 97 && code <= 122) || (code >= 48 && code <= 57)) { - out.push(ch); - lastHyphen = false; - } else if (SEPS.has(ch)) { - if (!lastHyphen) { - out.push("-"); - lastHyphen = true; - } - } - // else drop character - } - return out.join(""); + // a-z or 0-9 + if ((code >= 97 && code <= 122) || (code >= 48 && code <= 57)) { + out.push(ch); + lastHyphen = false; + } else if (SEPS.has(ch)) { + if (!lastHyphen) { + out.push("-"); + lastHyphen = true; + } + } + // else drop character + } + return out.join(""); } // ----------------------------- GraphQL Client ----------------------------- // GraphQL response interfaces interface GraphQLReviewResponse { - user?: { - reviewSummary?: { - count?: number; - score?: number; - }; - }; + user?: { + reviewSummary?: { + count?: number; + score?: number; + }; + }; } interface GraphQLProfileResponse { - user?: { - memberSince?: string; - accountType?: string; - }; + user?: { + memberSince?: string; + accountType?: string; + }; } // GraphQL queries const GRAPHQL_QUERIES = { - getReviewSummary: ` + getReviewSummary: ` query GetReviewSummary($userId: String!) { user(id: $userId) { reviewSummary { @@ -302,7 +302,7 @@ const GRAPHQL_QUERIES = { } } `, - getProfileMetrics: ` + getProfileMetrics: ` query GetProfileMetrics($profileId: String!) { user(id: $profileId) { memberSince @@ -317,98 +317,98 @@ const GRAPHQL_QUERIES = { * Fetch additional data via GraphQL API */ async function fetchGraphQLData( - query: string, - variables: Record, - BASE_URL = "https://www.kijiji.ca" + query: string, + variables: Record, + BASE_URL = "https://www.kijiji.ca", ): Promise { - const endpoint = `${BASE_URL}/anvil/api`; + const endpoint = `${BASE_URL}/anvil/api`; - try { - const response = await fetch(endpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - "apollo-require-preflight": "true", - }, - body: JSON.stringify({ - query, - variables, - }), - }); + try { + const response = await fetch(endpoint, { + method: "POST", + headers: { + "Content-Type": "application/json", + "apollo-require-preflight": "true", + }, + body: JSON.stringify({ + query, + variables, + }), + }); - if (!response.ok) { - throw new HttpError( - `GraphQL request failed with status ${response.status}`, - response.status, - endpoint - ); - } + if (!response.ok) { + throw new HttpError( + `GraphQL request failed with status ${response.status}`, + response.status, + endpoint, + ); + } - const result = await response.json(); + const result = await response.json(); - if (result.errors) { - throw new ParseError( - `GraphQL errors: ${JSON.stringify(result.errors)}`, - result.errors - ); - } + if (result.errors) { + throw new ParseError( + `GraphQL errors: ${JSON.stringify(result.errors)}`, + result.errors, + ); + } - return result.data; - } catch (err) { - if (err instanceof HttpError || err instanceof ParseError) { - throw err; - } - throw new NetworkError( - `Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`, - endpoint, - err instanceof Error ? err : undefined - ); - } + return result.data; + } catch (err) { + if (err instanceof HttpError || err instanceof ParseError) { + throw err; + } + throw new NetworkError( + `Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`, + endpoint, + err instanceof Error ? err : undefined, + ); + } } /** * Fetch additional seller data via GraphQL */ async function fetchSellerDetails( - posterId: string, - BASE_URL = "https://www.kijiji.ca" + posterId: string, + BASE_URL = "https://www.kijiji.ca", ): Promise<{ - reviewCount?: number; - reviewScore?: number; - memberSince?: string; - accountType?: string; + reviewCount?: number; + reviewScore?: number; + memberSince?: string; + accountType?: string; }> { - try { - const [reviewData, profileData] = await Promise.all([ - fetchGraphQLData( - GRAPHQL_QUERIES.getReviewSummary, - { userId: posterId }, - BASE_URL - ), - fetchGraphQLData( - GRAPHQL_QUERIES.getProfileMetrics, - { profileId: posterId }, - BASE_URL - ), - ]); + try { + const [reviewData, profileData] = await Promise.all([ + fetchGraphQLData( + GRAPHQL_QUERIES.getReviewSummary, + { userId: posterId }, + BASE_URL, + ), + fetchGraphQLData( + GRAPHQL_QUERIES.getProfileMetrics, + { profileId: posterId }, + BASE_URL, + ), + ]); - const reviewResponse = reviewData as GraphQLReviewResponse; - const profileResponse = profileData as GraphQLProfileResponse; + const reviewResponse = reviewData as GraphQLReviewResponse; + const profileResponse = profileData as GraphQLProfileResponse; - return { - reviewCount: reviewResponse?.user?.reviewSummary?.count, - reviewScore: reviewResponse?.user?.reviewSummary?.score, - memberSince: profileResponse?.user?.memberSince, - accountType: profileResponse?.user?.accountType, - }; - } catch (err) { - // Silently fail for GraphQL errors - not critical for basic functionality - console.warn( - `Failed to fetch seller details for ${posterId}:`, - err instanceof Error ? err.message : String(err) - ); - return {}; - } + return { + reviewCount: reviewResponse?.user?.reviewSummary?.count, + reviewScore: reviewResponse?.user?.reviewSummary?.score, + memberSince: profileResponse?.user?.memberSince, + accountType: profileResponse?.user?.accountType, + }; + } catch (err) { + // Silently fail for GraphQL errors - not critical for basic functionality + console.warn( + `Failed to fetch seller details for ${posterId}:`, + err instanceof Error ? err.message : String(err), + ); + return {}; + } } // ----------------------------- Parsing ----------------------------- @@ -416,18 +416,20 @@ async function fetchSellerDetails( /** Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML. */ -export function extractApolloState(htmlString: HTMLString): ApolloRecord | null { - const { document } = parseHTML(htmlString); - const nextData = document.getElementById("__NEXT_DATA__"); - if (!nextData || !nextData.textContent) return null; +export function extractApolloState( + htmlString: HTMLString, +): ApolloRecord | null { + const { document } = parseHTML(htmlString); + const nextData = document.getElementById("__NEXT_DATA__"); + if (!nextData || !nextData.textContent) return null; - try { - const jsonData = JSON.parse(nextData.textContent); - const apollo = jsonData?.props?.pageProps?.__APOLLO_STATE__; - return isRecord(apollo) ? apollo : null; - } catch { - return null; - } + try { + const jsonData = JSON.parse(nextData.textContent); + const apollo = jsonData?.props?.pageProps?.__APOLLO_STATE__; + return isRecord(apollo) ? apollo : null; + } catch { + return null; + } } /** @@ -435,384 +437,382 @@ export function extractApolloState(htmlString: HTMLString): ApolloRecord | null Filters keys likely to be listing entities and ensures url/title exist. */ export function parseSearch( - htmlString: HTMLString, - BASE_URL: string + htmlString: HTMLString, + BASE_URL: string, ): SearchListing[] { - const apolloState = extractApolloState(htmlString); - if (!apolloState) return []; + const apolloState = extractApolloState(htmlString); + if (!apolloState) return []; - const results: SearchListing[] = []; - for (const [key, value] of Object.entries(apolloState)) { - // Heuristic: Kijiji listing keys usually contain "Listing" - if (!key.includes("Listing")) continue; - if (!isRecord(value)) continue; + const results: SearchListing[] = []; + for (const [key, value] of Object.entries(apolloState)) { + // Heuristic: Kijiji listing keys usually contain "Listing" + if (!key.includes("Listing")) continue; + if (!isRecord(value)) continue; - const item = value as ApolloSearchItem; - if (typeof item.url === "string" && typeof item.title === "string") { - results.push({ - listingLink: item.url.startsWith("http") - ? item.url - : `${BASE_URL}${item.url}`, - name: item.title, - }); - } - } - return results; + const item = value as ApolloSearchItem; + if (typeof item.url === "string" && typeof item.title === "string") { + results.push({ + listingLink: item.url.startsWith("http") + ? item.url + : `${BASE_URL}${item.url}`, + name: item.title, + }); + } + } + return results; } /** Parse a listing page into a typed object (backward compatible). */ -function parseListing( - htmlString: HTMLString, - BASE_URL: string +function _parseListing( + htmlString: HTMLString, + BASE_URL: string, ): KijijiListingDetails | null { - const apolloState = extractApolloState(htmlString); - if (!apolloState) return null; + const apolloState = extractApolloState(htmlString); + if (!apolloState) return null; - // Find the listing root key - const listingKey = Object.keys(apolloState).find((k) => - k.includes("Listing") - ); - if (!listingKey) return null; + // Find the listing root key + const listingKey = Object.keys(apolloState).find((k) => + k.includes("Listing"), + ); + if (!listingKey) return null; - const root = apolloState[listingKey]; - if (!isRecord(root)) return null; + const root = apolloState[listingKey]; + if (!isRecord(root)) return null; - const { - url, - title, - description, - price, - type, - status, - activationDate, - endDate, - metrics, - location, - } = root as ApolloListingRoot; + const { + url, + title, + description, + price, + type, + status, + activationDate, + endDate, + metrics, + location, + } = root as ApolloListingRoot; - const cents = price?.amount != null ? Number(price.amount) : undefined; - const amountFormatted = - cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined; + const cents = price?.amount != null ? Number(price.amount) : undefined; + const amountFormatted = + cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined; - const numberOfViews = - metrics?.views != null ? Number(metrics.views) : undefined; + const numberOfViews = + metrics?.views != null ? Number(metrics.views) : undefined; - const listingUrl = - typeof url === "string" - ? url.startsWith("http") - ? url - : `${BASE_URL}${url}` - : ""; + const listingUrl = + typeof url === "string" + ? url.startsWith("http") + ? url + : `${BASE_URL}${url}` + : ""; - if (!listingUrl || !title) return null; + if (!listingUrl || !title) return null; - return { - url: listingUrl, - title, - description, - listingPrice: amountFormatted - ? { - amountFormatted, - cents: Number.isFinite(cents!) ? cents : undefined, - currency: price?.currency, - } - : undefined, - listingType: type, - listingStatus: status, - creationDate: activationDate, - endDate, - numberOfViews: Number.isFinite(numberOfViews!) ? numberOfViews : undefined, - address: location?.address ?? null, - }; + return { + url: listingUrl, + title, + description, + listingPrice: amountFormatted + ? { + amountFormatted, + cents: + cents !== undefined && Number.isFinite(cents) ? cents : undefined, + currency: price?.currency, + } + : undefined, + listingType: type, + listingStatus: status, + creationDate: activationDate, + endDate, + numberOfViews: + numberOfViews !== undefined && Number.isFinite(numberOfViews) + ? numberOfViews + : undefined, + address: location?.address ?? null, + }; } /** * Parse a listing page into a detailed object with all available fields */ export async function parseDetailedListing( - htmlString: HTMLString, - BASE_URL: string, - options: ListingFetchOptions = {} + htmlString: HTMLString, + BASE_URL: string, + options: ListingFetchOptions = {}, ): Promise { - const apolloState = extractApolloState(htmlString); - if (!apolloState) return null; + const apolloState = extractApolloState(htmlString); + if (!apolloState) return null; - // Find the listing root key - const listingKey = Object.keys(apolloState).find((k) => - k.includes("Listing") - ); - if (!listingKey) return null; + // Find the listing root key + const listingKey = Object.keys(apolloState).find((k) => + k.includes("Listing"), + ); + if (!listingKey) return null; - const root = apolloState[listingKey]; - if (!isRecord(root)) return null; + const root = apolloState[listingKey]; + if (!isRecord(root)) return null; - const { - url, - title, - description, - price, - type, - status, - activationDate, - endDate, - metrics, - location, - imageUrls, - categoryId, - adSource, - flags, - posterInfo, - attributes, - } = root as ApolloListingRoot; + const { + url, + title, + description, + price, + type, + status, + activationDate, + endDate, + metrics, + location, + imageUrls, + categoryId, + adSource, + flags, + posterInfo, + attributes, + } = root as ApolloListingRoot; - const cents = price?.amount != null ? Number(price.amount) : undefined; - const amountFormatted = - cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined; + const cents = price?.amount != null ? Number(price.amount) : undefined; + const amountFormatted = + cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined; - const numberOfViews = - metrics?.views != null ? Number(metrics.views) : undefined; + const numberOfViews = + metrics?.views != null ? Number(metrics.views) : undefined; - const listingUrl = - typeof url === "string" - ? url.startsWith("http") - ? url - : `${BASE_URL}${url}` - : ""; + const listingUrl = + typeof url === "string" + ? url.startsWith("http") + ? url + : `${BASE_URL}${url}` + : ""; - if (!listingUrl || !title) return null; + if (!listingUrl || !title) return null; - // Only include fixed-price listings - if (!amountFormatted || cents === undefined) return null; + // Only include fixed-price listings + if (!amountFormatted || cents === undefined) return null; - // Extract images if requested - const images = - options.includeImages !== false && Array.isArray(imageUrls) - ? imageUrls.filter((url): url is string => typeof url === "string") - : []; + // Extract images if requested + const images = + options.includeImages !== false && Array.isArray(imageUrls) + ? imageUrls.filter((url): url is string => typeof url === "string") + : []; - // Extract attributes as key-value pairs - const attributeMap: Record = {}; - if (Array.isArray(attributes)) { - for (const attr of attributes) { - if (attr?.canonicalName && Array.isArray(attr.canonicalValues)) { - attributeMap[attr.canonicalName] = attr.canonicalValues; - } - } - } + // Extract attributes as key-value pairs + const attributeMap: Record = {}; + if (Array.isArray(attributes)) { + for (const attr of attributes) { + if (attr?.canonicalName && Array.isArray(attr.canonicalValues)) { + attributeMap[attr.canonicalName] = attr.canonicalValues; + } + } + } - // Extract seller info based on depth setting - let sellerInfo: DetailedListing["sellerInfo"]; - const depth = options.sellerDataDepth ?? "detailed"; + // Extract seller info based on depth setting + let sellerInfo: DetailedListing["sellerInfo"]; + const depth = options.sellerDataDepth ?? "detailed"; - if (posterInfo?.posterId) { - sellerInfo = { - posterId: posterInfo.posterId, - rating: - typeof posterInfo.rating === "number" ? posterInfo.rating : undefined, - }; + if (posterInfo?.posterId) { + sellerInfo = { + posterId: posterInfo.posterId, + rating: + typeof posterInfo.rating === "number" ? posterInfo.rating : undefined, + }; - // Add more detailed info if requested and client-side data is enabled - if ( - (depth === "detailed" || depth === "full") && - options.includeClientSideData - ) { - try { - const additionalData = await fetchSellerDetails( - posterInfo.posterId, - BASE_URL - ); - sellerInfo = { - ...sellerInfo, - ...additionalData, - }; - } catch { - // Silently fail - GraphQL data is optional - console.warn( - `Failed to fetch additional seller data for ${posterInfo.posterId}` - ); - } - } - } + // Add more detailed info if requested and client-side data is enabled + if ( + (depth === "detailed" || depth === "full") && + options.includeClientSideData + ) { + try { + const additionalData = await fetchSellerDetails( + posterInfo.posterId, + BASE_URL, + ); + sellerInfo = { + ...sellerInfo, + ...additionalData, + }; + } catch { + // Silently fail - GraphQL data is optional + console.warn( + `Failed to fetch additional seller data for ${posterInfo.posterId}`, + ); + } + } + } - return { - url: listingUrl, - title, - description, - listingPrice: { - amountFormatted, - cents, - currency: price?.currency, - }, - listingType: type, - listingStatus: status, - creationDate: activationDate, - endDate, - numberOfViews: - numberOfViews !== undefined && Number.isFinite(numberOfViews) - ? numberOfViews - : undefined, - address: location?.address ?? null, - images, - categoryId: typeof categoryId === "number" ? categoryId : 0, - adSource: typeof adSource === "string" ? adSource : "UNKNOWN", - flags: { - topAd: flags?.topAd === true, - priceDrop: flags?.priceDrop === true, - }, - attributes: attributeMap, - location: { - id: typeof location?.id === "number" ? location.id : 0, - name: typeof location?.name === "string" ? location.name : "Unknown", - coordinates: location?.coordinates - ? { - latitude: location.coordinates.latitude, - longitude: location.coordinates.longitude, - } - : undefined, - }, - sellerInfo, - }; + return { + url: listingUrl, + title, + description, + listingPrice: { + amountFormatted, + cents, + currency: price?.currency, + }, + listingType: type, + listingStatus: status, + creationDate: activationDate, + endDate, + numberOfViews: + numberOfViews !== undefined && Number.isFinite(numberOfViews) + ? numberOfViews + : undefined, + address: location?.address ?? null, + images, + categoryId: typeof categoryId === "number" ? categoryId : 0, + adSource: typeof adSource === "string" ? adSource : "UNKNOWN", + flags: { + topAd: flags?.topAd === true, + priceDrop: flags?.priceDrop === true, + }, + attributes: attributeMap, + location: { + id: typeof location?.id === "number" ? location.id : 0, + name: typeof location?.name === "string" ? location.name : "Unknown", + coordinates: location?.coordinates + ? { + latitude: location.coordinates.latitude, + longitude: location.coordinates.longitude, + } + : undefined, + }, + sellerInfo, + }; } // ----------------------------- Main ----------------------------- export default async function fetchKijijiItems( - SEARCH_QUERY: string, - REQUESTS_PER_SECOND = 1, - BASE_URL = "https://www.kijiji.ca", - searchOptions: SearchOptions = {}, - listingOptions: ListingFetchOptions = {} + SEARCH_QUERY: string, + REQUESTS_PER_SECOND = 1, + BASE_URL = "https://www.kijiji.ca", + searchOptions: SearchOptions = {}, + listingOptions: ListingFetchOptions = {}, ) { - const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); - // Set defaults for configuration - const finalSearchOptions: Required = { - location: searchOptions.location ?? 1700272, // Default to GTA - category: searchOptions.category ?? 0, // Default to all categories - keywords: searchOptions.keywords ?? SEARCH_QUERY, - sortBy: searchOptions.sortBy ?? "relevancy", - sortOrder: searchOptions.sortOrder ?? "desc", - maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages - priceMin: searchOptions.priceMin as number, - priceMax: searchOptions.priceMax as number, - }; + // Set defaults for configuration + const finalSearchOptions: Required = { + location: searchOptions.location ?? 1700272, // Default to GTA + category: searchOptions.category ?? 0, // Default to all categories + keywords: searchOptions.keywords ?? SEARCH_QUERY, + sortBy: searchOptions.sortBy ?? "relevancy", + sortOrder: searchOptions.sortOrder ?? "desc", + maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages + priceMin: searchOptions.priceMin as number, + priceMax: searchOptions.priceMax as number, + }; - const finalListingOptions: Required = { - includeImages: listingOptions.includeImages ?? true, - sellerDataDepth: listingOptions.sellerDataDepth ?? "detailed", - includeClientSideData: listingOptions.includeClientSideData ?? false, - }; + const finalListingOptions: Required = { + includeImages: listingOptions.includeImages ?? true, + sellerDataDepth: listingOptions.sellerDataDepth ?? "detailed", + includeClientSideData: listingOptions.includeClientSideData ?? false, + }; - const allListings: DetailedListing[] = []; - const seenUrls = new Set(); + const allListings: DetailedListing[] = []; + const seenUrls = new Set(); - // Fetch multiple pages - for (let page = 1; page <= finalSearchOptions.maxPages; page++) { - const searchUrl = buildSearchUrl( - finalSearchOptions.keywords, - { - ...finalSearchOptions, - // Add page parameter for pagination - ...(page > 1 && { page }), - }, - BASE_URL - ); + // Fetch multiple pages + for (let page = 1; page <= finalSearchOptions.maxPages; page++) { + const searchUrl = buildSearchUrl( + finalSearchOptions.keywords, + { + ...finalSearchOptions, + // Add page parameter for pagination + ...(page > 1 && { page }), + }, + BASE_URL, + ); - console.log(`Fetching search page ${page}: ${searchUrl}`); - const searchHtml = await fetchHtml(searchUrl, DELAY_MS, { - onRateInfo: (remaining, reset) => { - if (remaining && reset) { - console.log( - `\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s` - ); - } - }, - }); + console.log(`Fetching search page ${page}: ${searchUrl}`); + const searchHtml = await fetchHtml(searchUrl, DELAY_MS, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log( + `\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`, + ); + } + }, + }); - const searchResults = parseSearch(searchHtml, BASE_URL); - if (searchResults.length === 0) { - console.log( - `No more results found on page ${page}. Stopping pagination.` - ); - break; - } + const searchResults = parseSearch(searchHtml, BASE_URL); + if (searchResults.length === 0) { + console.log( + `No more results found on page ${page}. Stopping pagination.`, + ); + break; + } - // Deduplicate links across pages - const newListingLinks = searchResults - .map((r) => r.listingLink) - .filter((link) => !seenUrls.has(link)); + // Deduplicate links across pages + const newListingLinks = searchResults + .map((r) => r.listingLink) + .filter((link) => !seenUrls.has(link)); - for (const link of newListingLinks) { - seenUrls.add(link); - } + for (const link of newListingLinks) { + seenUrls.add(link); + } - console.log( - `\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}` - ); + console.log( + `\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`, + ); - // Fetch details for this page's listings - const progressBar = new cliProgress.SingleBar( - {}, - cliProgress.Presets.shades_classic - ); - const totalProgress = newListingLinks.length; - let currentProgress = 0; - progressBar.start(totalProgress, currentProgress); + // Fetch details for this page's listings + const progressBar = new cliProgress.SingleBar( + {}, + cliProgress.Presets.shades_classic, + ); + const totalProgress = newListingLinks.length; + let currentProgress = 0; + progressBar.start(totalProgress, currentProgress); - for (const link of newListingLinks) { - try { - const html = await fetchHtml(link, DELAY_MS, { - onRateInfo: (remaining, reset) => { - if (remaining && reset) { - console.log( - `\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s` - ); - } - }, - }); - const parsed = await parseDetailedListing( - html, - BASE_URL, - finalListingOptions - ); - if (parsed) { - allListings.push(parsed); - } - } catch (err) { - if (err instanceof HttpError) { - console.error( - `\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}` - ); - } else { - console.error( - `\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}` - ); - } - } finally { - currentProgress++; - progressBar.update(currentProgress); - } - } + for (const link of newListingLinks) { + try { + const html = await fetchHtml(link, DELAY_MS, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log( + `\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`, + ); + } + }, + }); + const parsed = await parseDetailedListing( + html, + BASE_URL, + finalListingOptions, + ); + if (parsed) { + allListings.push(parsed); + } + } catch (err) { + if (err instanceof HttpError) { + console.error( + `\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`, + ); + } else { + console.error( + `\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`, + ); + } + } finally { + currentProgress++; + progressBar.update(currentProgress); + } + } - progressBar.stop(); + progressBar.stop(); - // If we got fewer results than expected (40 per page), we've reached the end - if (searchResults.length < 40) { - break; - } - } + // If we got fewer results than expected (40 per page), we've reached the end + if (searchResults.length < 40) { + break; + } + } - console.log(`\nParsed ${allListings.length} detailed listings.`); - return allListings; + console.log(`\nParsed ${allListings.length} detailed listings.`); + return allListings; } // Re-export error classes for convenience -export { - HttpError, - NetworkError, - ParseError, - RateLimitError, - ValidationError, -}; +export { HttpError, NetworkError, ParseError, RateLimitError, ValidationError }; diff --git a/packages/core/src/types/common.ts b/packages/core/src/types/common.ts index a39854c..692e1cc 100644 --- a/packages/core/src/types/common.ts +++ b/packages/core/src/types/common.ts @@ -3,18 +3,18 @@ export type HTMLString = string; /** Currency price object with formatting options */ export interface Price { - amountFormatted: string; - cents: number; - currency: string; + amountFormatted: string; + cents: number; + currency: string; } /** Base listing details common across all marketplaces */ export interface ListingDetails { - url: string; - title: string; - listingPrice: Price; - listingType: string; - listingStatus: string; - address?: string | null; - creationDate?: string; + url: string; + title: string; + listingPrice: Price; + listingType: string; + listingStatus: string; + address?: string | null; + creationDate?: string; } diff --git a/packages/core/src/utils/delay.ts b/packages/core/src/utils/delay.ts index a48063e..3de349d 100644 --- a/packages/core/src/utils/delay.ts +++ b/packages/core/src/utils/delay.ts @@ -4,5 +4,5 @@ * @returns A promise that resolves after the specified delay */ export function delay(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); + return new Promise((resolve) => setTimeout(resolve, ms)); } diff --git a/packages/core/src/utils/format.ts b/packages/core/src/utils/format.ts index ca50fb1..f4d0f09 100644 --- a/packages/core/src/utils/format.ts +++ b/packages/core/src/utils/format.ts @@ -4,18 +4,21 @@ * @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US') * @returns Formatted currency string */ -export function formatCentsToCurrency(cents: number, locale: string = "en-CA"): string { - try { - const formatter = new Intl.NumberFormat(locale, { - style: "currency", - currency: "CAD", - minimumFractionDigits: 2, - maximumFractionDigits: 2, - }); - return formatter.format(cents / 100); - } catch (error) { - // Fallback if locale is not supported - const dollars = (cents / 100).toFixed(2); - return `$${dollars}`; - } +export function formatCentsToCurrency( + cents: number, + locale: string = "en-CA", +): string { + try { + const formatter = new Intl.NumberFormat(locale, { + style: "currency", + currency: "CAD", + minimumFractionDigits: 2, + maximumFractionDigits: 2, + }); + return formatter.format(cents / 100); + } catch { + // Fallback if locale is not supported + const dollars = (cents / 100).toFixed(2); + return `$${dollars}`; + } } diff --git a/packages/core/src/utils/http.ts b/packages/core/src/utils/http.ts index 8c603dd..c6b75a4 100644 --- a/packages/core/src/utils/http.ts +++ b/packages/core/src/utils/http.ts @@ -1,79 +1,79 @@ /** Custom error class for HTTP-related failures */ export class HttpError extends Error { - constructor( - message: string, - public readonly statusCode: number, - public readonly url?: string - ) { - super(message); - this.name = "HttpError"; - } + constructor( + message: string, + public readonly statusCode: number, + public readonly url?: string, + ) { + super(message); + this.name = "HttpError"; + } } /** Error class for network failures (timeouts, connection issues) */ export class NetworkError extends Error { - constructor( - message: string, - public readonly url: string, - public readonly cause?: Error - ) { - super(message); - this.name = "NetworkError"; - } + constructor( + message: string, + public readonly url: string, + public readonly cause?: Error, + ) { + super(message); + this.name = "NetworkError"; + } } /** Error class for parsing failures */ export class ParseError extends Error { - constructor( - message: string, - public readonly data?: unknown - ) { - super(message); - this.name = "ParseError"; - } + constructor( + message: string, + public readonly data?: unknown, + ) { + super(message); + this.name = "ParseError"; + } } /** Error class for rate limiting */ export class RateLimitError extends Error { - constructor( - message: string, - public readonly url: string, - public readonly resetTime?: number - ) { - super(message); - this.name = "RateLimitError"; - } + constructor( + message: string, + public readonly url: string, + public readonly resetTime?: number, + ) { + super(message); + this.name = "RateLimitError"; + } } /** Error class for validation failures */ export class ValidationError extends Error { - constructor(message: string) { - super(message); - this.name = "ValidationError"; - } + constructor(message: string) { + super(message); + this.name = "ValidationError"; + } } /** Type guard to check if a value is a record (object) */ export function isRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); + return typeof value === "object" && value !== null && !Array.isArray(value); } /** * Calculate exponential backoff delay with jitter */ function calculateBackoffDelay(attempt: number, baseMs: number): number { - const exponentialDelay = baseMs * 2 ** attempt; - const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter - return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds + const exponentialDelay = baseMs * 2 ** attempt; + const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter + return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds } /** Options for fetchHtml */ export interface FetchHtmlOptions { - maxRetries?: number; - retryBaseMs?: number; - timeoutMs?: number; - onRateInfo?: (remaining: string | null, reset: string | null) => void; - headers?: Record; + maxRetries?: number; + retryBaseMs?: number; + timeoutMs?: number; + onRateInfo?: (remaining: string | null, reset: string | null) => void; + headers?: Record; } /** @@ -85,116 +85,116 @@ export interface FetchHtmlOptions { * @throws HttpError, NetworkError, or RateLimitError on failure */ export async function fetchHtml( - url: string, - delayMs: number, - opts?: FetchHtmlOptions + url: string, + delayMs: number, + opts?: FetchHtmlOptions, ): Promise { - const maxRetries = opts?.maxRetries ?? 3; - const retryBaseMs = opts?.retryBaseMs ?? 1000; - const timeoutMs = opts?.timeoutMs ?? 30000; + const maxRetries = opts?.maxRetries ?? 3; + const retryBaseMs = opts?.retryBaseMs ?? 1000; + const timeoutMs = opts?.timeoutMs ?? 30000; - const defaultHeaders: Record = { - accept: - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", - "cache-control": "no-cache", - "upgrade-insecure-requests": "1", - "user-agent": - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", - }; + const defaultHeaders: Record = { + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", + "cache-control": "no-cache", + "upgrade-insecure-requests": "1", + "user-agent": + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", + }; - for (let attempt = 0; attempt <= maxRetries; attempt++) { - try { - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), timeoutMs); + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), timeoutMs); - const res = await fetch(url, { - method: "GET", - headers: { ...defaultHeaders, ...opts?.headers }, - signal: controller.signal, - }); + const res = await fetch(url, { + method: "GET", + headers: { ...defaultHeaders, ...opts?.headers }, + signal: controller.signal, + }); - clearTimeout(timeoutId); + clearTimeout(timeoutId); - const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); - const rateLimitReset = res.headers.get("X-RateLimit-Reset"); - opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); + const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); + const rateLimitReset = res.headers.get("X-RateLimit-Reset"); + opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); - if (!res.ok) { - // Handle rate limiting - if (res.status === 429) { - const resetSeconds = rateLimitReset - ? Number(rateLimitReset) - : Number.NaN; - const waitMs = Number.isFinite(resetSeconds) - ? Math.max(0, resetSeconds * 1000) - : calculateBackoffDelay(attempt, retryBaseMs); + if (!res.ok) { + // Handle rate limiting + if (res.status === 429) { + const resetSeconds = rateLimitReset + ? Number(rateLimitReset) + : Number.NaN; + const waitMs = Number.isFinite(resetSeconds) + ? Math.max(0, resetSeconds * 1000) + : calculateBackoffDelay(attempt, retryBaseMs); - if (attempt < maxRetries) { - await new Promise((resolve) => setTimeout(resolve, waitMs)); - continue; - } - throw new RateLimitError( - `Rate limit exceeded for ${url}`, - url, - resetSeconds - ); - } + if (attempt < maxRetries) { + await new Promise((resolve) => setTimeout(resolve, waitMs)); + continue; + } + throw new RateLimitError( + `Rate limit exceeded for ${url}`, + url, + resetSeconds, + ); + } - // Retry on server errors - if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { - await new Promise((resolve) => - setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)) - ); - continue; - } + // Retry on server errors + if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { + await new Promise((resolve) => + setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)), + ); + continue; + } - throw new HttpError( - `Request failed with status ${res.status}`, - res.status, - url - ); - } + throw new HttpError( + `Request failed with status ${res.status}`, + res.status, + url, + ); + } - const html = await res.text(); + const html = await res.text(); - // Respect per-request delay to maintain rate limiting - await new Promise((resolve) => setTimeout(resolve, delayMs)); - return html; - } catch (err) { - // Re-throw known errors - if ( - err instanceof RateLimitError || - err instanceof HttpError || - err instanceof NetworkError - ) { - throw err; - } + // Respect per-request delay to maintain rate limiting + await new Promise((resolve) => setTimeout(resolve, delayMs)); + return html; + } catch (err) { + // Re-throw known errors + if ( + err instanceof RateLimitError || + err instanceof HttpError || + err instanceof NetworkError + ) { + throw err; + } - if (err instanceof Error && err.name === "AbortError") { - if (attempt < maxRetries) { - await new Promise((resolve) => - setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)) - ); - continue; - } - throw new NetworkError(`Request timeout for ${url}`, url, err); - } + if (err instanceof Error && err.name === "AbortError") { + if (attempt < maxRetries) { + await new Promise((resolve) => + setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)), + ); + continue; + } + throw new NetworkError(`Request timeout for ${url}`, url, err); + } - // Network or other errors - if (attempt < maxRetries) { - await new Promise((resolve) => - setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)) - ); - continue; - } - throw new NetworkError( - `Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`, - url, - err instanceof Error ? err : undefined - ); - } - } + // Network or other errors + if (attempt < maxRetries) { + await new Promise((resolve) => + setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)), + ); + continue; + } + throw new NetworkError( + `Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`, + url, + err instanceof Error ? err : undefined, + ); + } + } - throw new NetworkError(`Exhausted retries without response for ${url}`, url); + throw new NetworkError(`Exhausted retries without response for ${url}`, url); } diff --git a/packages/mcp-server/src/protocol/handler.ts b/packages/mcp-server/src/protocol/handler.ts index 26399b0..e9525b2 100644 --- a/packages/mcp-server/src/protocol/handler.ts +++ b/packages/mcp-server/src/protocol/handler.ts @@ -1,206 +1,219 @@ -import { fetchKijijiItems, fetchFacebookItems, fetchEbayItems } from "@marketplace-scrapers/core"; +import { + fetchEbayItems, + fetchFacebookItems, + fetchKijijiItems, +} from "@marketplace-scrapers/core"; import { tools } from "./tools"; /** * Handle MCP JSON-RPC 2.0 protocol requests */ export async function handleMcpRequest(req: Request): Promise { - try { - const body = await req.json(); + try { + const body = await req.json(); - // Validate JSON-RPC 2.0 format - if (!body.jsonrpc || body.jsonrpc !== "2.0" || !body.method) { - return Response.json( - { - jsonrpc: "2.0", - error: { code: -32600, message: "Invalid Request" }, - id: body.id, - }, - { status: 400 } - ); - } + // Validate JSON-RPC 2.0 format + if (!body.jsonrpc || body.jsonrpc !== "2.0" || !body.method) { + return Response.json( + { + jsonrpc: "2.0", + error: { code: -32600, message: "Invalid Request" }, + id: body.id, + }, + { status: 400 }, + ); + } - const { method, params, id } = body; + const { method, params, id } = body; - // Handle initialize method - if (method === "initialize") { - return Response.json({ - jsonrpc: "2.0", - id, - result: { - protocolVersion: "2025-06-18", - capabilities: { - tools: { - listChanged: true, - }, - }, - serverInfo: { - name: "marketplace-scrapers", - version: "1.0.0", - }, - instructions: "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces", - }, - }); - } + // Handle initialize method + if (method === "initialize") { + return Response.json({ + jsonrpc: "2.0", + id, + result: { + protocolVersion: "2025-06-18", + capabilities: { + tools: { + listChanged: true, + }, + }, + serverInfo: { + name: "marketplace-scrapers", + version: "1.0.0", + }, + instructions: + "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces", + }, + }); + } - // Handle tools/list method - if (method === "tools/list") { - return Response.json({ - jsonrpc: "2.0", - id, - result: { - tools, - }, - }); - } + // Handle tools/list method + if (method === "tools/list") { + return Response.json({ + jsonrpc: "2.0", + id, + result: { + tools, + }, + }); + } - // Handle notifications (messages without id field should not get a response) - if (!id) { - // Notifications don't require a response - if (method === "notifications/initialized") { - // Client initialized successfully, no response needed - return new Response(null, { status: 204 }); - } - if (method === "notifications/progress") { - // Progress notifications, no response needed - return new Response(null, { status: 204 }); - } - // Unknown notification - still no response for notifications - return new Response(null, { status: 204 }); - } + // Handle notifications (messages without id field should not get a response) + if (!id) { + // Notifications don't require a response + if (method === "notifications/initialized") { + // Client initialized successfully, no response needed + return new Response(null, { status: 204 }); + } + if (method === "notifications/progress") { + // Progress notifications, no response needed + return new Response(null, { status: 204 }); + } + // Unknown notification - still no response for notifications + return new Response(null, { status: 204 }); + } - // Handle tools/call method - if (method === "tools/call") { - const { name, arguments: args } = params || {}; + // Handle tools/call method + if (method === "tools/call") { + const { name, arguments: args } = params || {}; - if (!name || !args) { - return Response.json( - { - jsonrpc: "2.0", - id, - error: { code: -32602, message: "Invalid params: name and arguments required" }, - }, - { status: 400 } - ); - } + if (!name || !args) { + return Response.json( + { + jsonrpc: "2.0", + id, + error: { + code: -32602, + message: "Invalid params: name and arguments required", + }, + }, + { status: 400 }, + ); + } - // Route tool calls to appropriate handlers - try { - let result; + // Route tool calls to appropriate handlers + try { + let result: unknown; - if (name === "search_kijiji") { - const query = args.query; - if (!query) { - return Response.json({ - jsonrpc: "2.0", - id, - error: { code: -32602, message: "query parameter is required" }, - }); - } - const searchOptions = { - location: args.location, - category: args.category, - keywords: args.keywords, - sortBy: args.sortBy, - sortOrder: args.sortOrder, - maxPages: args.maxPages || 5, - priceMin: args.priceMin, - priceMax: args.priceMax, - }; - const items = await fetchKijijiItems( - query, - 1, - "https://www.kijiji.ca", - searchOptions, - {} - ); - result = items || []; - } else if (name === "search_facebook") { - const query = args.query; - if (!query) { - return Response.json({ - jsonrpc: "2.0", - id, - error: { code: -32602, message: "query parameter is required" }, - }); - } - const items = await fetchFacebookItems( - query, - 1, - args.location || "toronto", - args.maxItems || 25, - args.cookiesSource, - undefined - ); - result = items || []; - } else if (name === "search_ebay") { - const query = args.query; - if (!query) { - return Response.json({ - jsonrpc: "2.0", - id, - error: { code: -32602, message: "query parameter is required" }, - }); - } - const items = await fetchEbayItems(query, 1, { - minPrice: args.minPrice, - maxPrice: args.maxPrice, - strictMode: args.strictMode || false, - exclusions: args.exclusions || [], - keywords: args.keywords || [query], - buyItNowOnly: args.buyItNowOnly !== false, - canadaOnly: args.canadaOnly !== false, - }); + if (name === "search_kijiji") { + const query = args.query; + if (!query) { + return Response.json({ + jsonrpc: "2.0", + id, + error: { code: -32602, message: "query parameter is required" }, + }); + } + const searchOptions = { + location: args.location, + category: args.category, + keywords: args.keywords, + sortBy: args.sortBy, + sortOrder: args.sortOrder, + maxPages: args.maxPages || 5, + priceMin: args.priceMin, + priceMax: args.priceMax, + }; + const items = await fetchKijijiItems( + query, + 1, + "https://www.kijiji.ca", + searchOptions, + {}, + ); + result = items || []; + } else if (name === "search_facebook") { + const query = args.query; + if (!query) { + return Response.json({ + jsonrpc: "2.0", + id, + error: { code: -32602, message: "query parameter is required" }, + }); + } + const items = await fetchFacebookItems( + query, + 1, + args.location || "toronto", + args.maxItems || 25, + args.cookiesSource, + undefined, + ); + result = items || []; + } else if (name === "search_ebay") { + const query = args.query; + if (!query) { + return Response.json({ + jsonrpc: "2.0", + id, + error: { code: -32602, message: "query parameter is required" }, + }); + } + const items = await fetchEbayItems(query, 1, { + minPrice: args.minPrice, + maxPrice: args.maxPrice, + strictMode: args.strictMode || false, + exclusions: args.exclusions || [], + keywords: args.keywords || [query], + buyItNowOnly: args.buyItNowOnly !== false, + canadaOnly: args.canadaOnly !== false, + }); - const results = args.maxItems ? items.slice(0, args.maxItems) : items; - result = results || []; - } else { - return Response.json({ - jsonrpc: "2.0", - id, - error: { code: -32601, message: `Unknown tool: ${name}` }, - }); - } + const results = args.maxItems ? items.slice(0, args.maxItems) : items; + result = results || []; + } else { + return Response.json({ + jsonrpc: "2.0", + id, + error: { code: -32601, message: `Unknown tool: ${name}` }, + }); + } - return Response.json({ - jsonrpc: "2.0", - id, - result: { - content: [ - { - type: "text", - text: JSON.stringify(result, null, 2), - }, - ], - }, - }); - } catch (error) { - const errorMessage = error instanceof Error ? error.message : "Unknown error"; - return Response.json({ - jsonrpc: "2.0", - id, - error: { code: -32603, message: `Tool execution failed: ${errorMessage}` }, - }); - } - } + return Response.json({ + jsonrpc: "2.0", + id, + result: { + content: [ + { + type: "text", + text: JSON.stringify(result, null, 2), + }, + ], + }, + }); + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : "Unknown error"; + return Response.json({ + jsonrpc: "2.0", + id, + error: { + code: -32603, + message: `Tool execution failed: ${errorMessage}`, + }, + }); + } + } - // Method not found - return Response.json( - { - jsonrpc: "2.0", - id, - error: { code: -32601, message: `Method not found: ${method}` }, - }, - { status: 404 } - ); - } catch (error) { - const errorMessage = error instanceof Error ? error.message : "Unknown error"; - return Response.json( - { - jsonrpc: "2.0", - error: { code: -32700, message: `Parse error: ${errorMessage}` }, - }, - { status: 400 } - ); - } + // Method not found + return Response.json( + { + jsonrpc: "2.0", + id, + error: { code: -32601, message: `Method not found: ${method}` }, + }, + { status: 404 }, + ); + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : "Unknown error"; + return Response.json( + { + jsonrpc: "2.0", + error: { code: -32700, message: `Parse error: ${errorMessage}` }, + }, + { status: 400 }, + ); + } } diff --git a/packages/mcp-server/src/protocol/metadata.ts b/packages/mcp-server/src/protocol/metadata.ts index 24db8d2..5a45efb 100644 --- a/packages/mcp-server/src/protocol/metadata.ts +++ b/packages/mcp-server/src/protocol/metadata.ts @@ -3,23 +3,25 @@ */ export const serverCard = { - $schema: "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json", - version: "1.0", - protocolVersion: "2025-06-18", - serverInfo: { - name: "marketplace-scrapers", - title: "Marketplace Scrapers MCP Server", - version: "1.0.0", - }, - transport: { - type: "streamable-http", - endpoint: "/mcp", - }, - capabilities: { - tools: { - listChanged: true, - }, - }, - description: "Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay", - tools: "dynamic", + $schema: + "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json", + version: "1.0", + protocolVersion: "2025-06-18", + serverInfo: { + name: "marketplace-scrapers", + title: "Marketplace Scrapers MCP Server", + version: "1.0.0", + }, + transport: { + type: "streamable-http", + endpoint: "/mcp", + }, + capabilities: { + tools: { + listChanged: true, + }, + }, + description: + "Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay", + tools: "dynamic", }; diff --git a/packages/mcp-server/src/protocol/tools.ts b/packages/mcp-server/src/protocol/tools.ts index 63cc116..512bc72 100644 --- a/packages/mcp-server/src/protocol/tools.ts +++ b/packages/mcp-server/src/protocol/tools.ts @@ -3,135 +3,138 @@ */ export const tools = [ - { - name: "search_kijiji", - description: "Search Kijiji marketplace for listings matching a query", - inputSchema: { - type: "object", - properties: { - query: { - type: "string", - description: "Search query for Kijiji listings", - }, - location: { - type: "string", - description: "Location name or ID (e.g., 'toronto', 'gta', 'ontario')", - }, - category: { - type: "string", - description: "Category name or ID (e.g., 'computers', 'furniture', 'bikes')", - }, - keywords: { - type: "string", - description: "Additional keywords to filter results", - }, - sortBy: { - type: "string", - description: "Sort results by field", - enum: ["relevancy", "date", "price", "distance"], - default: "relevancy", - }, - sortOrder: { - type: "string", - description: "Sort order", - enum: ["asc", "desc"], - default: "desc", - }, - maxPages: { - type: "number", - description: "Maximum pages to fetch (~40 items per page)", - default: 5, - }, - priceMin: { - type: "number", - description: "Minimum price in cents", - }, - priceMax: { - type: "number", - description: "Maximum price in cents", - }, - }, - required: ["query"], - }, - }, - { - name: "search_facebook", - description: "Search Facebook Marketplace for listings matching a query", - inputSchema: { - type: "object", - properties: { - query: { - type: "string", - description: "Search query for Facebook Marketplace listings", - }, - location: { - type: "string", - description: "Location for search (e.g., 'toronto')", - default: "toronto", - }, - maxItems: { - type: "number", - description: "Maximum number of items to return", - default: 5, - }, - cookiesSource: { - type: "string", - description: "Optional Facebook session cookies source", - }, - }, - required: ["query"], - }, - }, - { - name: "search_ebay", - description: "Search eBay for listings matching a query (default: Buy It Now only, Canada only)", - inputSchema: { - type: "object", - properties: { - query: { - type: "string", - description: "Search query for eBay listings", - }, - minPrice: { - type: "number", - description: "Minimum price filter", - }, - maxPrice: { - type: "number", - description: "Maximum price filter", - }, - strictMode: { - type: "boolean", - description: "Enable strict search mode", - default: false, - }, - exclusions: { - type: "array", - items: { type: "string" }, - description: "Terms to exclude from results", - }, - keywords: { - type: "array", - items: { type: "string" }, - description: "Keywords to include in search", - }, - buyItNowOnly: { - type: "boolean", - description: "Include only Buy It Now listings (exclude auctions)", - default: true, - }, - canadaOnly: { - type: "boolean", - description: "Include only Canadian sellers/listings", - default: true, - }, - maxItems: { - type: "number", - description: "Maximum number of items to return", - default: 5, - }, - }, - required: ["query"], - }, - }, + { + name: "search_kijiji", + description: "Search Kijiji marketplace for listings matching a query", + inputSchema: { + type: "object", + properties: { + query: { + type: "string", + description: "Search query for Kijiji listings", + }, + location: { + type: "string", + description: + "Location name or ID (e.g., 'toronto', 'gta', 'ontario')", + }, + category: { + type: "string", + description: + "Category name or ID (e.g., 'computers', 'furniture', 'bikes')", + }, + keywords: { + type: "string", + description: "Additional keywords to filter results", + }, + sortBy: { + type: "string", + description: "Sort results by field", + enum: ["relevancy", "date", "price", "distance"], + default: "relevancy", + }, + sortOrder: { + type: "string", + description: "Sort order", + enum: ["asc", "desc"], + default: "desc", + }, + maxPages: { + type: "number", + description: "Maximum pages to fetch (~40 items per page)", + default: 5, + }, + priceMin: { + type: "number", + description: "Minimum price in cents", + }, + priceMax: { + type: "number", + description: "Maximum price in cents", + }, + }, + required: ["query"], + }, + }, + { + name: "search_facebook", + description: "Search Facebook Marketplace for listings matching a query", + inputSchema: { + type: "object", + properties: { + query: { + type: "string", + description: "Search query for Facebook Marketplace listings", + }, + location: { + type: "string", + description: "Location for search (e.g., 'toronto')", + default: "toronto", + }, + maxItems: { + type: "number", + description: "Maximum number of items to return", + default: 5, + }, + cookiesSource: { + type: "string", + description: "Optional Facebook session cookies source", + }, + }, + required: ["query"], + }, + }, + { + name: "search_ebay", + description: + "Search eBay for listings matching a query (default: Buy It Now only, Canada only)", + inputSchema: { + type: "object", + properties: { + query: { + type: "string", + description: "Search query for eBay listings", + }, + minPrice: { + type: "number", + description: "Minimum price filter", + }, + maxPrice: { + type: "number", + description: "Maximum price filter", + }, + strictMode: { + type: "boolean", + description: "Enable strict search mode", + default: false, + }, + exclusions: { + type: "array", + items: { type: "string" }, + description: "Terms to exclude from results", + }, + keywords: { + type: "array", + items: { type: "string" }, + description: "Keywords to include in search", + }, + buyItNowOnly: { + type: "boolean", + description: "Include only Buy It Now listings (exclude auctions)", + default: true, + }, + canadaOnly: { + type: "boolean", + description: "Include only Canadian sellers/listings", + default: true, + }, + maxItems: { + type: "number", + description: "Maximum number of items to return", + default: 5, + }, + }, + required: ["query"], + }, + }, ];