/* eslint-disable @typescript-eslint/no-explicit-any */ import { parseHTML } from "linkedom"; import cliProgress from "cli-progress"; /** * Facebook Marketplace Scraper * * Note: Facebook Marketplace requires authentication cookies for full access. * This implementation will return limited or no results without proper authentication. * This is by design to respect Facebook's authentication requirements. */ // ----------------------------- Types ----------------------------- type HTMLString = string; interface Cookie { name: string; value: string; domain: string; path: string; secure?: boolean; httpOnly?: boolean; sameSite?: "strict" | "lax" | "none" | "unspecified"; session?: boolean; expirationDate?: number; partitionKey?: Record; storeId?: string; } interface FacebookAdNode { node: { listing: { id: string; marketplace_listing_title?: string; listing_price?: { amount?: string | number; currency?: string; }; location?: { reverse_geocode?: { city_page?: { display_name?: string; }; }; }; creation_time?: number; [k: string]: unknown; }; [k: string]: unknown; }; } interface FacebookEdge { node: FacebookAdNode["node"]; [k: string]: unknown; } interface FacebookMarketplaceSearch { feed_units?: { edges?: FacebookEdge[]; }; [k: string]: unknown; } interface FacebookRequireData { require?: [number, number, number, FacebookMarketplaceSearch, number][]; [k: string]: unknown; } interface FacebookMarketplaceItem { // Basic identification id: string; __typename: "GroupCommerceProductItem"; // Listing content marketplace_listing_title: string; redacted_description?: { text: string; }; custom_title?: string; // Pricing formatted_price?: { text: string; }; listing_price?: { amount: string; currency: string; amount_with_offset: string; }; // Location location_text?: { text: string; }; location?: { latitude: number; longitude: number; reverse_geocode_detailed?: { country_alpha_two: string; postal_code_trimmed: string; }; }; // Status flags is_live?: boolean; is_sold?: boolean; is_pending?: boolean; is_hidden?: boolean; is_draft?: boolean; // Timing creation_time?: number; // Seller information marketplace_listing_seller?: { __typename: "User"; id: string; name: string; profile_picture?: { uri: string; }; join_time?: number; }; // Vehicle-specific fields (for automotive listings) vehicle_make_display_name?: string; vehicle_model_display_name?: string; vehicle_odometer_data?: { unit: "KILOMETERS" | "MILES"; value: number; }; vehicle_transmission_type?: "AUTOMATIC" | "MANUAL"; vehicle_exterior_color?: string; vehicle_interior_color?: string; vehicle_condition?: "EXCELLENT" | "GOOD" | "FAIR" | "POOR"; vehicle_fuel_type?: string; vehicle_trim_display_name?: string; // Category and commerce marketplace_listing_category_id?: string; condition?: string; // Commerce features delivery_types?: string[]; is_shipping_offered?: boolean; is_buy_now_enabled?: boolean; can_buyer_make_checkout_offer?: boolean; // Communication messaging_enabled?: boolean; first_message_suggested_value?: string; // Metadata logging_id?: string; reportable_ent_id?: string; // Related listings (for part-out sellers) marketplace_listing_sets?: { edges: Array<{ node: { canonical_listing: { id: string; marketplace_listing_title: string; is_live: boolean; is_sold: boolean; formatted_price: { text: string }; }; }; }>; }; [k: string]: unknown; } type ListingDetails = { url: string; title: string; description?: string; listingPrice?: { amountFormatted: string; cents?: number; currency?: string; }; listingType?: string; listingStatus?: string; creationDate?: string; endDate?: string; numberOfViews?: number; address?: string | null; // Facebook-specific fields imageUrl?: string; videoUrl?: string; seller?: { name?: string; id?: string; }; categoryId?: string; deliveryTypes?: string[]; }; // ----------------------------- Utilities ----------------------------- function isRecord(value: unknown): value is Record { return typeof value === "object" && value !== null; } async function delay(ms: number): Promise { await new Promise((resolve) => setTimeout(resolve, ms)); } /** * Load Facebook cookies from file or string */ async function loadFacebookCookies(cookiesSource?: string, cookiePath = './cookies/facebook.json'): Promise { // First try to load from provided string parameter if (cookiesSource) { try { const cookies = JSON.parse(cookiesSource); if (Array.isArray(cookies)) { return cookies.filter( (cookie): cookie is Cookie => cookie && typeof cookie.name === "string" && typeof cookie.value === "string", ); } } catch (e) { throw new Error(`Invalid cookies JSON provided: ${e}`); } } // Try to load from specified path try { const cookiesPath = cookiePath; const file = Bun.file(cookiesPath); if (await file.exists()) { const content = await file.text(); const cookies = JSON.parse(content); if (Array.isArray(cookies)) { return cookies.filter( (cookie): cookie is Cookie => cookie && typeof cookie.name === "string" && typeof cookie.value === "string", ); } } } catch (e) { console.warn(`Could not load cookies from ./cookies/facebook.json: ${e}`); } return []; } /** * Parse Facebook cookie string into Cookie array format */ function parseFacebookCookieString(cookieString: string): Cookie[] { if (!cookieString || !cookieString.trim()) { return []; } return cookieString .split(';') .map(pair => pair.trim()) .filter(pair => pair.includes('=')) .map(pair => { const [name, value] = pair.split('=', 2); const trimmedName = name.trim(); const trimmedValue = value.trim(); // Skip empty names or values if (!trimmedName || !trimmedValue) { return null; } return { name: trimmedName, value: decodeURIComponent(trimmedValue), domain: '.facebook.com', path: '/', secure: true, httpOnly: false, sameSite: 'lax' as const, expirationDate: undefined, // Session cookies }; }) .filter((cookie): cookie is Cookie => cookie !== null); } /** * Ensure Facebook cookies are available, parsing from env var if needed */ async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Promise { // First try to load existing cookies try { const existing = await loadFacebookCookies(undefined, cookiePath); if (existing.length > 0) { return existing; } } catch (error) { // File doesn't exist or is invalid, continue to check env var } // Try to parse from environment variable const cookieString = process.env.FACEBOOK_COOKIE; if (!cookieString || !cookieString.trim()) { throw new Error( 'No valid Facebook cookies found. Either:\n' + ' 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n' + ' 2. Create ./cookies/facebook.json manually with cookie array' ); } // Parse the cookie string const cookies = parseFacebookCookieString(cookieString); if (cookies.length === 0) { throw new Error( 'FACEBOOK_COOKIE environment variable contains no valid cookies. ' + 'Expected format: "name1=value1; name2=value2;"' ); } // Save to file for future use try { await Bun.write(cookiePath, JSON.stringify(cookies, null, 2)); console.log(`✅ Saved ${cookies.length} Facebook cookies to ${cookiePath}`); } catch (error) { console.warn(`⚠️ Could not save cookies to ${cookiePath}: ${error}`); // Continue anyway, we have the cookies in memory } return cookies; } /** * Format cookies array into Cookie header string */ function formatCookiesForHeader(cookies: Cookie[], domain: string): string { const validCookies = cookies .filter((cookie) => { // Check if cookie applies to this domain if (cookie.domain.startsWith(".")) { // Domain cookie (applies to subdomains) return ( domain.endsWith(cookie.domain.slice(1)) || domain === cookie.domain.slice(1) ); } else { // Host-only cookie return cookie.domain === domain; } }) .filter((cookie) => { // Check expiration if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) { return false; // Expired } return true; }); return validCookies .map((cookie) => `${cookie.name}=${cookie.value}`) .join("; "); } class HttpError extends Error { constructor( message: string, public readonly status: number, public readonly url: string, ) { super(message); this.name = "HttpError"; } } // ----------------------------- HTTP Client ----------------------------- /** Fetch HTML with a basic retry strategy and simple rate-limit delay between calls. - Retries on 429 and 5xx - Respects X-RateLimit-Reset when present (seconds) - Supports custom cookies for Facebook authentication */ async function fetchHtml( url: string, DELAY_MS: number, opts?: { maxRetries?: number; retryBaseMs?: number; onRateInfo?: (remaining: string | null, reset: string | null) => void; cookies?: string; }, ): Promise { const maxRetries = opts?.maxRetries ?? 3; const retryBaseMs = opts?.retryBaseMs ?? 500; for (let attempt = 0; attempt <= maxRetries; attempt++) { try { const headers: Record = { accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", "accept-encoding": "gzip, deflate, br", "cache-control": "no-cache", "upgrade-insecure-requests": "1", "sec-fetch-dest": "document", "sec-fetch-mode": "navigate", "sec-fetch-site": "none", "sec-fetch-user": "?1", "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", }; // Add cookies if provided if (opts?.cookies) { headers["cookie"] = opts.cookies; } const res = await fetch(url, { method: "GET", headers, }); const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); const rateLimitReset = res.headers.get("X-RateLimit-Reset"); opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); if (!res.ok) { // Respect 429 reset if provided if (res.status === 429) { const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN; const waitMs = Number.isFinite(resetSeconds) ? Math.max(0, resetSeconds * 1000) : (attempt + 1) * retryBaseMs; await delay(waitMs); continue; } // For Facebook, 400 often means authentication required // Don't retry 4xx client errors except 429 if (res.status >= 400 && res.status < 500 && res.status !== 429) { throw new HttpError( `Request failed with status ${res.status} (Facebook may require authentication cookies for access)`, res.status, url, ); } // Retry on 5xx if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { await delay((attempt + 1) * retryBaseMs); continue; } throw new HttpError( `Request failed with status ${res.status}`, res.status, url, ); } const html = await res.text(); // Respect per-request delay to keep at or under REQUESTS_PER_SECOND await delay(DELAY_MS); return html; } catch (err) { if (attempt >= maxRetries) throw err; await delay((attempt + 1) * retryBaseMs); } } throw new Error("Exhausted retries without response"); } // ----------------------------- Parsing ----------------------------- /** Extract marketplace search data from Facebook page script tags */ function extractFacebookMarketplaceData( htmlString: HTMLString, ): FacebookAdNode[] | null { const { document } = parseHTML(htmlString); const scripts = document.querySelectorAll("script"); let marketplaceData: FacebookMarketplaceSearch | null = null; // Find the script containing the require data with marketplace_search for (const script of Array.from(scripts) as HTMLScriptElement[]) { const scriptText = script.textContent; if (!scriptText) continue; try { const parsed = JSON.parse(scriptText); // First check if this is the direct data structure (like in examples) if (parsed.require && Array.isArray(parsed.require)) { // Try multiple navigation paths to find marketplace_search const paths = [ // Original path from example () => parsed.require[0][3][0]['__bbox']['require'][0][3][1]['__bbox']['result']['data']['marketplace_search'], // Alternative path structure () => parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search, // Another variation () => parsed.require[0][3][0]['__bbox']['result']['data']['marketplace_search'], // Direct access for some responses () => { for (const item of parsed.require) { if (item && item.length >= 4 && item[3]) { const bbox = item[3]?.['__bbox']?.result?.data?.marketplace_search; if (bbox) return bbox; } } return null; } ]; for (const getData of paths) { try { const result = getData(); if (result && isRecord(result) && result.feed_units?.edges?.length > 0) { marketplaceData = result as FacebookMarketplaceSearch; break; } } catch { continue; } } if (marketplaceData) break; } // Also check for direct marketplace_search in the parsed data if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) { const searchData = parsed.marketplace_search as FacebookMarketplaceSearch; if (searchData.feed_units?.edges?.length > 0) { marketplaceData = searchData; break; } } } catch { // Ignore parsing errors for other scripts } } if (!marketplaceData?.feed_units?.edges?.length) { console.warn("No marketplace data found in HTML response"); return null; } console.log(`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`); return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node })); } /** * Monitor API extraction success/failure for detecting changes */ let extractionStats = { totalExtractions: 0, successfulExtractions: 0, failedExtractions: 0, lastApiChangeDetected: null as Date | null, }; /** * Log extraction metrics for monitoring API stability */ function logExtractionMetrics(success: boolean, itemId?: string) { extractionStats.totalExtractions++; if (success) { extractionStats.successfulExtractions++; } else { extractionStats.failedExtractions++; } // Log warning if extraction success rate drops below 80% const successRate = extractionStats.successfulExtractions / extractionStats.totalExtractions; if (extractionStats.totalExtractions > 10 && successRate < 0.8 && !extractionStats.lastApiChangeDetected) { console.warn("⚠️ Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes."); extractionStats.lastApiChangeDetected = new Date(); } if (success) { console.log(`📊 Facebook API extraction stats: ${extractionStats.successfulExtractions}/${extractionStats.totalExtractions} successful`); } else { console.warn(`❌ Facebook API extraction failed for item ${itemId || 'unknown'}`); } } /** * Turns cents to localized currency string. */ function formatCentsToCurrency( num: number | string | undefined, locale = "en-US", ): string { if (num == null) return ""; const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; if (Number.isNaN(cents)) return ""; const dollars = cents / 100; const formatter = new Intl.NumberFormat(locale, { style: 'currency', currency: 'USD', minimumFractionDigits: 2, maximumFractionDigits: 2, useGrouping: true, }); return formatter.format(dollars); } /** Extract marketplace item details from Facebook item page HTML Updated for 2026 Facebook Marketplace API structure with multiple extraction paths */ function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceItem | null { const { document } = parseHTML(htmlString); const scripts = document.querySelectorAll("script"); for (const script of scripts) { const scriptText = script.textContent; if (!scriptText) continue; try { const parsed = JSON.parse(scriptText); // Check for the 2026 require structure with marketplace product details if (parsed.require && Array.isArray(parsed.require)) { // Try multiple extraction paths discovered from reverse engineering const extractionPaths = [ // Path 1: Primary path from current API structure () => parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target, // Path 2: Alternative path with nested require () => parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target, // Path 3: Variation without the [0] index () => parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target, // Path 4-5: Additional fallback paths for edge cases () => parsed.require[0][3][1]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target, () => parsed.require[0][3][2]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target, ]; let pathIndex = 0; for (const getPath of extractionPaths) { try { const targetData = getPath(); if (targetData && typeof targetData === 'object' && targetData.id && targetData.marketplace_listing_title && targetData.__typename === 'GroupCommerceProductItem') { console.log(`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`); return targetData as FacebookMarketplaceItem; } } catch { // Path not found or invalid, try next path } pathIndex++; } // Fallback: Search recursively for marketplace data in the parsed structure const findMarketplaceData = (obj: unknown, depth = 0, maxDepth = 10): FacebookMarketplaceItem | null => { if (depth > maxDepth) return null; // Prevent infinite recursion if (isRecord(obj)) { // Check if this object matches the expected marketplace item structure if (obj.marketplace_listing_title && obj.id && obj.__typename === 'GroupCommerceProductItem' && obj.redacted_description) { return obj as FacebookMarketplaceItem; } // Recursively search nested objects and arrays for (const key in obj) { const value = obj[key]; if (isRecord(value) || Array.isArray(value)) { const result = findMarketplaceData(value, depth + 1, maxDepth); if (result) return result; } } } else if (Array.isArray(obj)) { // Search through arrays for (const item of obj) { const result = findMarketplaceData(item, depth + 1, maxDepth); if (result) return result; } } return null; }; // Search through the entire require structure const recursiveResult = findMarketplaceData(parsed.require); if (recursiveResult) { console.log('Successfully extracted Facebook item data using recursive search'); return recursiveResult; } // Additional search in other potential locations if (parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target) { const bboxData = parsed.__bbox.result.data.viewer.marketplace_product_details_page.target; if (bboxData && typeof bboxData === 'object' && bboxData.id && bboxData.marketplace_listing_title && bboxData.__typename === 'GroupCommerceProductItem') { console.log('Successfully extracted Facebook item data from __bbox structure'); return bboxData as FacebookMarketplaceItem; } } } } catch (error) { // Log parsing errors for debugging but continue to next script console.debug(`Failed to parse script for Facebook item data: ${error}`); } } return null; } /** Parse Facebook marketplace search results into ListingDetails[] */ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] { const results: ListingDetails[] = []; for (const adJson of ads) { try { const listing = adJson.node.listing; const title = listing.marketplace_listing_title; const priceObj = listing.listing_price; if (!title || !priceObj) continue; const id = listing.id; const url = `https://www.facebook.com/marketplace/item/${id}`; // Facebook stores price in different fields: // - amount_with_offset_in_currency: Facebook's internal price encoding (not cents) // - amount: dollars (like "1.00") // - formatted_amount: human-readable price (like "CA$1") let cents: number; if (priceObj.amount != null) { const dollars = typeof priceObj.amount === 'string' ? Number.parseFloat(priceObj.amount) : priceObj.amount; cents = Math.round(dollars * 100); } else if (priceObj.amount_with_offset_in_currency != null) { // Fallback: try to extract cents from amount_with_offset_in_currency // This appears to use some exchange rate/multiplier format const encodedAmount = Number(priceObj.amount_with_offset_in_currency); if (!Number.isNaN(encodedAmount) && encodedAmount > 0) { // Estimate roughly - this field doesn't contain real cents // Use formatted_amount to get the actual dollar amount if (priceObj.formatted_amount) { const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/); if (match) { const dollars = Number.parseFloat(match[0].replace(',', '')); if (!Number.isNaN(dollars)) { cents = Math.round(dollars * 100); } else { cents = encodedAmount; // fallback } } else { cents = encodedAmount; // fallback } } else { cents = encodedAmount; // fallback } } else { continue; // Invalid price } } else { continue; // No price available } if (!Number.isFinite(cents) || cents <= 0) continue; // Extract address from location data if available const cityName = listing.location?.reverse_geocode?.city_page?.display_name; const address = cityName || null; // Determine listing status from Facebook flags let listingStatus: string | undefined = undefined; if (listing.is_sold) { listingStatus = "SOLD"; } else if (listing.is_pending) { listingStatus = "PENDING"; } else if (listing.is_live) { listingStatus = "ACTIVE"; } else if (listing.is_hidden) { listingStatus = "HIDDEN"; } // Format creation date if available const creationDate = listing.creation_time ? new Date(listing.creation_time * 1000).toISOString() : undefined; // Extract image and video URLs const imageUrl = listing.primary_listing_photo?.image?.uri; const videoUrl = listing.listing_video ? `https://www.facebook.com/${listing.listing_video.id}/` : undefined; // Extract seller information const seller = listing.marketplace_listing_seller ? { name: listing.marketplace_listing_seller.name, id: listing.marketplace_listing_seller.id } : undefined; const listingDetails: ListingDetails = { url, title, listingPrice: { amountFormatted: priceObj.formatted_amount || formatCentsToCurrency(cents), cents, currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD }, address, creationDate, listingType: "item", // Default type for marketplace listings listingStatus, categoryId: listing.marketplace_listing_category_id, imageUrl, videoUrl, seller, deliveryTypes: listing.delivery_types, }; results.push(listingDetails); } catch { // Skip malformed ads continue; } } return results; } // ----------------------------- Main ----------------------------- export default async function fetchFacebookItems( SEARCH_QUERY: string, REQUESTS_PER_SECOND = 1, LOCATION = "toronto", MAX_ITEMS = 25, cookiesSource?: string, ) { // Load Facebook cookies - required for Facebook Marketplace access const cookies = await loadFacebookCookies(cookiesSource); if (cookies.length === 0) { throw new Error( "Facebook cookies are required for marketplace access. " + "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.", ); } // Format cookies for HTTP header const domain = "www.facebook.com"; const cookiesHeader = formatCookiesForHeader(cookies, domain); if (!cookiesHeader) { throw new Error( "No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", ); } const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); // Encode search query for URL const encodedQuery = encodeURIComponent(SEARCH_QUERY); // Facebook marketplace URL structure const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`; console.log(`Fetching Facebook marketplace: ${searchUrl}`); console.log(`Using ${cookies.length} cookies for authentication`); let searchHtml: string; try { searchHtml = await fetchHtml(searchUrl, DELAY_MS, { onRateInfo: (remaining, reset) => { if (remaining && reset) { console.log( "\n" + `Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`, ); } }, cookies: cookiesHeader, }); } catch (err) { if (err instanceof HttpError) { console.warn( `\nFacebook marketplace access failed (${err.status}): ${err.message}`, ); if (err.status === 400 || err.status === 401 || err.status === 403) { console.warn( "This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.", ); } return []; } throw err; } const ads = extractFacebookMarketplaceData(searchHtml); if (!ads || ads.length === 0) { console.warn("No ads parsed from Facebook marketplace page."); return []; } console.log(`\nFound ${ads.length} raw ads. Processing...`); const progressBar = new cliProgress.SingleBar( {}, cliProgress.Presets.shades_classic, ); const totalProgress = ads.length; let currentProgress = 0; progressBar.start(totalProgress, currentProgress); const items = parseFacebookAds(ads); // Filter to only priced items (already done in parseFacebookAds) const pricedItems = items.filter( (item) => item.listingPrice?.cents && item.listingPrice.cents > 0, ); progressBar.update(totalProgress); progressBar.stop(); console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`); return pricedItems.slice(0, MAX_ITEMS); // Limit results }