Files
ca-marketplace-scraper/src/facebook.ts

928 lines
26 KiB
TypeScript

/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom";
import cliProgress from "cli-progress";
/**
* Facebook Marketplace Scraper
*
* Note: Facebook Marketplace requires authentication cookies for full access.
* This implementation will return limited or no results without proper authentication.
* This is by design to respect Facebook's authentication requirements.
*/
// ----------------------------- Types -----------------------------
type HTMLString = string;
interface Cookie {
name: string;
value: string;
domain: string;
path: string;
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
interface FacebookAdNode {
node: {
listing: {
id: string;
marketplace_listing_title?: string;
listing_price?: {
amount?: string | number;
currency?: string;
};
location?: {
reverse_geocode?: {
city_page?: {
display_name?: string;
};
};
};
creation_time?: number;
[k: string]: unknown;
};
[k: string]: unknown;
};
}
interface FacebookEdge {
node: FacebookAdNode["node"];
[k: string]: unknown;
}
interface FacebookMarketplaceSearch {
feed_units?: {
edges?: FacebookEdge[];
};
[k: string]: unknown;
}
interface FacebookRequireData {
require?: [number, number, number, FacebookMarketplaceSearch, number][];
[k: string]: unknown;
}
interface FacebookMarketplaceItem {
// Basic identification
id: string;
__typename: "GroupCommerceProductItem";
// Listing content
marketplace_listing_title: string;
redacted_description?: {
text: string;
};
custom_title?: string;
// Pricing
formatted_price?: {
text: string;
};
listing_price?: {
amount: string;
currency: string;
amount_with_offset: string;
};
// Location
location_text?: {
text: string;
};
location?: {
latitude: number;
longitude: number;
reverse_geocode_detailed?: {
country_alpha_two: string;
postal_code_trimmed: string;
};
};
// Status flags
is_live?: boolean;
is_sold?: boolean;
is_pending?: boolean;
is_hidden?: boolean;
is_draft?: boolean;
// Timing
creation_time?: number;
// Seller information
marketplace_listing_seller?: {
__typename: "User";
id: string;
name: string;
profile_picture?: {
uri: string;
};
join_time?: number;
};
// Vehicle-specific fields (for automotive listings)
vehicle_make_display_name?: string;
vehicle_model_display_name?: string;
vehicle_odometer_data?: {
unit: "KILOMETERS" | "MILES";
value: number;
};
vehicle_transmission_type?: "AUTOMATIC" | "MANUAL";
vehicle_exterior_color?: string;
vehicle_interior_color?: string;
vehicle_condition?: "EXCELLENT" | "GOOD" | "FAIR" | "POOR";
vehicle_fuel_type?: string;
vehicle_trim_display_name?: string;
// Category and commerce
marketplace_listing_category_id?: string;
condition?: string;
// Commerce features
delivery_types?: string[];
is_shipping_offered?: boolean;
is_buy_now_enabled?: boolean;
can_buyer_make_checkout_offer?: boolean;
// Communication
messaging_enabled?: boolean;
first_message_suggested_value?: string;
// Metadata
logging_id?: string;
reportable_ent_id?: string;
// Related listings (for part-out sellers)
marketplace_listing_sets?: {
edges: Array<{
node: {
canonical_listing: {
id: string;
marketplace_listing_title: string;
is_live: boolean;
is_sold: boolean;
formatted_price: { text: string };
};
};
}>;
};
[k: string]: unknown;
}
type ListingDetails = {
url: string;
title: string;
description?: string;
listingPrice?: {
amountFormatted: string;
cents?: number;
currency?: string;
};
listingType?: string;
listingStatus?: string;
creationDate?: string;
endDate?: string;
numberOfViews?: number;
address?: string | null;
// Facebook-specific fields
imageUrl?: string;
videoUrl?: string;
seller?: {
name?: string;
id?: string;
};
categoryId?: string;
deliveryTypes?: string[];
};
// ----------------------------- Utilities -----------------------------
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null;
}
async function delay(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Load Facebook cookies from file or string
*/
async function loadFacebookCookies(cookiesSource?: string, cookiePath = './cookies/facebook.json'): Promise<Cookie[]> {
// First try to load from provided string parameter
if (cookiesSource) {
try {
const cookies = JSON.parse(cookiesSource);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
} catch (e) {
throw new Error(`Invalid cookies JSON provided: ${e}`);
}
}
// Try to load from specified path
try {
const cookiesPath = cookiePath;
const file = Bun.file(cookiesPath);
if (await file.exists()) {
const content = await file.text();
const cookies = JSON.parse(content);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
}
} catch (e) {
console.warn(`Could not load cookies from ./cookies/facebook.json: ${e}`);
}
return [];
}
/**
* Parse Facebook cookie string into Cookie array format
*/
function parseFacebookCookieString(cookieString: string): Cookie[] {
if (!cookieString || !cookieString.trim()) {
return [];
}
return cookieString
.split(';')
.map(pair => pair.trim())
.filter(pair => pair.includes('='))
.map(pair => {
const [name, value] = pair.split('=', 2);
const trimmedName = name.trim();
const trimmedValue = value.trim();
// Skip empty names or values
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain: '.facebook.com',
path: '/',
secure: true,
httpOnly: false,
sameSite: 'lax' as const,
expirationDate: undefined, // Session cookies
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
}
/**
* Ensure Facebook cookies are available, parsing from env var if needed
*/
async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Promise<Cookie[]> {
// First try to load existing cookies
try {
const existing = await loadFacebookCookies(undefined, cookiePath);
if (existing.length > 0) {
return existing;
}
} catch (error) {
// File doesn't exist or is invalid, continue to check env var
}
// Try to parse from environment variable
const cookieString = process.env.FACEBOOK_COOKIE;
if (!cookieString || !cookieString.trim()) {
throw new Error(
'No valid Facebook cookies found. Either:\n' +
' 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n' +
' 2. Create ./cookies/facebook.json manually with cookie array'
);
}
// Parse the cookie string
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length === 0) {
throw new Error(
'FACEBOOK_COOKIE environment variable contains no valid cookies. ' +
'Expected format: "name1=value1; name2=value2;"'
);
}
// Save to file for future use
try {
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
console.log(`✅ Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
} catch (error) {
console.warn(`⚠️ Could not save cookies to ${cookiePath}: ${error}`);
// Continue anyway, we have the cookies in memory
}
return cookies;
}
/**
* Format cookies array into Cookie header string
*/
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
domain.endsWith(cookie.domain.slice(1)) ||
domain === cookie.domain.slice(1)
);
} else {
// Host-only cookie
return cookie.domain === domain;
}
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false; // Expired
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
}
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- HTTP Client -----------------------------
/**
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls.
- Retries on 429 and 5xx
- Respects X-RateLimit-Reset when present (seconds)
- Supports custom cookies for Facebook authentication
*/
async function fetchHtml(
url: string,
DELAY_MS: number,
opts?: {
maxRetries?: number;
retryBaseMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void;
cookies?: string;
},
): Promise<HTMLString> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const headers: Record<string, string> = {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"accept-encoding": "gzip, deflate, br",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
};
// Add cookies if provided
if (opts?.cookies) {
headers["cookie"] = opts.cookies;
}
const res = await fetch(url, {
method: "GET",
headers,
});
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
await delay(waitMs);
continue;
}
// For Facebook, 400 often means authentication required
// Don't retry 4xx client errors except 429
if (res.status >= 400 && res.status < 500 && res.status !== 429) {
throw new HttpError(
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
res.status,
url,
);
}
// Retry on 5xx
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await delay((attempt + 1) * retryBaseMs);
continue;
}
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
}
const html = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
return html;
} catch (err) {
if (attempt >= maxRetries) throw err;
await delay((attempt + 1) * retryBaseMs);
}
}
throw new Error("Exhausted retries without response");
}
// ----------------------------- Parsing -----------------------------
/**
Extract marketplace search data from Facebook page script tags
*/
function extractFacebookMarketplaceData(
htmlString: HTMLString,
): FacebookAdNode[] | null {
const { document } = parseHTML(htmlString);
const scripts = document.querySelectorAll("script");
let marketplaceData: FacebookMarketplaceSearch | null = null;
// Find the script containing the require data with marketplace_search
for (const script of Array.from(scripts) as HTMLScriptElement[]) {
const scriptText = script.textContent;
if (!scriptText) continue;
try {
const parsed = JSON.parse(scriptText);
// First check if this is the direct data structure (like in examples)
if (parsed.require && Array.isArray(parsed.require)) {
// Try multiple navigation paths to find marketplace_search
const paths = [
// Original path from example
() => parsed.require[0][3][0]['__bbox']['require'][0][3][1]['__bbox']['result']['data']['marketplace_search'],
// Alternative path structure
() => parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search,
// Another variation
() => parsed.require[0][3][0]['__bbox']['result']['data']['marketplace_search'],
// Direct access for some responses
() => {
for (const item of parsed.require) {
if (item && item.length >= 4 && item[3]) {
const bbox = item[3]?.['__bbox']?.result?.data?.marketplace_search;
if (bbox) return bbox;
}
}
return null;
}
];
for (const getData of paths) {
try {
const result = getData();
if (result && isRecord(result) && result.feed_units?.edges?.length > 0) {
marketplaceData = result as FacebookMarketplaceSearch;
break;
}
} catch {
continue;
}
}
if (marketplaceData) break;
}
// Also check for direct marketplace_search in the parsed data
if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) {
const searchData = parsed.marketplace_search as FacebookMarketplaceSearch;
if (searchData.feed_units?.edges?.length > 0) {
marketplaceData = searchData;
break;
}
}
} catch {
// Ignore parsing errors for other scripts
}
}
if (!marketplaceData?.feed_units?.edges?.length) {
console.warn("No marketplace data found in HTML response");
return null;
}
console.log(`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`);
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
}
/**
* Monitor API extraction success/failure for detecting changes
*/
let extractionStats = {
totalExtractions: 0,
successfulExtractions: 0,
failedExtractions: 0,
lastApiChangeDetected: null as Date | null,
};
/**
* Log extraction metrics for monitoring API stability
*/
function logExtractionMetrics(success: boolean, itemId?: string) {
extractionStats.totalExtractions++;
if (success) {
extractionStats.successfulExtractions++;
} else {
extractionStats.failedExtractions++;
}
// Log warning if extraction success rate drops below 80%
const successRate = extractionStats.successfulExtractions / extractionStats.totalExtractions;
if (extractionStats.totalExtractions > 10 && successRate < 0.8 && !extractionStats.lastApiChangeDetected) {
console.warn("⚠️ Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.");
extractionStats.lastApiChangeDetected = new Date();
}
if (success) {
console.log(`📊 Facebook API extraction stats: ${extractionStats.successfulExtractions}/${extractionStats.totalExtractions} successful`);
} else {
console.warn(`❌ Facebook API extraction failed for item ${itemId || 'unknown'}`);
}
}
/**
* Turns cents to localized currency string.
*/
function formatCentsToCurrency(
num: number | string | undefined,
locale = "en-US",
): string {
if (num == null) return "";
const cents = typeof num === "string" ? Number.parseInt(num, 10) : num;
if (Number.isNaN(cents)) return "";
const dollars = cents / 100;
const formatter = new Intl.NumberFormat(locale, {
style: 'currency',
currency: 'USD',
minimumFractionDigits: 2,
maximumFractionDigits: 2,
useGrouping: true,
});
return formatter.format(dollars);
}
/**
Extract marketplace item details from Facebook item page HTML
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
*/
function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceItem | null {
const { document } = parseHTML(htmlString);
const scripts = document.querySelectorAll("script");
for (const script of scripts) {
const scriptText = script.textContent;
if (!scriptText) continue;
try {
const parsed = JSON.parse(scriptText);
// Check for the 2026 require structure with marketplace product details
if (parsed.require && Array.isArray(parsed.require)) {
// Try multiple extraction paths discovered from reverse engineering
const extractionPaths = [
// Path 1: Primary path from current API structure
() => parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target,
// Path 2: Alternative path with nested require
() => parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target,
// Path 3: Variation without the [0] index
() => parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target,
// Path 4-5: Additional fallback paths for edge cases
() => parsed.require[0][3][1]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target,
() => parsed.require[0][3][2]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target,
];
let pathIndex = 0;
for (const getPath of extractionPaths) {
try {
const targetData = getPath();
if (targetData && typeof targetData === 'object' &&
targetData.id && targetData.marketplace_listing_title &&
targetData.__typename === 'GroupCommerceProductItem') {
console.log(`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`);
return targetData as FacebookMarketplaceItem;
}
} catch {
// Path not found or invalid, try next path
}
pathIndex++;
}
// Fallback: Search recursively for marketplace data in the parsed structure
const findMarketplaceData = (obj: unknown, depth = 0, maxDepth = 10): FacebookMarketplaceItem | null => {
if (depth > maxDepth) return null; // Prevent infinite recursion
if (isRecord(obj)) {
// Check if this object matches the expected marketplace item structure
if (obj.marketplace_listing_title && obj.id &&
obj.__typename === 'GroupCommerceProductItem' &&
obj.redacted_description) {
return obj as FacebookMarketplaceItem;
}
// Recursively search nested objects and arrays
for (const key in obj) {
const value = obj[key];
if (isRecord(value) || Array.isArray(value)) {
const result = findMarketplaceData(value, depth + 1, maxDepth);
if (result) return result;
}
}
} else if (Array.isArray(obj)) {
// Search through arrays
for (const item of obj) {
const result = findMarketplaceData(item, depth + 1, maxDepth);
if (result) return result;
}
}
return null;
};
// Search through the entire require structure
const recursiveResult = findMarketplaceData(parsed.require);
if (recursiveResult) {
console.log('Successfully extracted Facebook item data using recursive search');
return recursiveResult;
}
// Additional search in other potential locations
if (parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target) {
const bboxData = parsed.__bbox.result.data.viewer.marketplace_product_details_page.target;
if (bboxData && typeof bboxData === 'object' &&
bboxData.id && bboxData.marketplace_listing_title &&
bboxData.__typename === 'GroupCommerceProductItem') {
console.log('Successfully extracted Facebook item data from __bbox structure');
return bboxData as FacebookMarketplaceItem;
}
}
}
} catch (error) {
// Log parsing errors for debugging but continue to next script
console.debug(`Failed to parse script for Facebook item data: ${error}`);
}
}
return null;
}
/**
Parse Facebook marketplace search results into ListingDetails[]
*/
function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
const results: ListingDetails[] = [];
for (const adJson of ads) {
try {
const listing = adJson.node.listing;
const title = listing.marketplace_listing_title;
const priceObj = listing.listing_price;
if (!title || !priceObj) continue;
const id = listing.id;
const url = `https://www.facebook.com/marketplace/item/${id}`;
// Facebook stores price in different fields:
// - amount_with_offset_in_currency: Facebook's internal price encoding (not cents)
// - amount: dollars (like "1.00")
// - formatted_amount: human-readable price (like "CA$1")
let cents: number;
if (priceObj.amount != null) {
const dollars = typeof priceObj.amount === 'string'
? Number.parseFloat(priceObj.amount)
: priceObj.amount;
cents = Math.round(dollars * 100);
} else if (priceObj.amount_with_offset_in_currency != null) {
// Fallback: try to extract cents from amount_with_offset_in_currency
// This appears to use some exchange rate/multiplier format
const encodedAmount = Number(priceObj.amount_with_offset_in_currency);
if (!Number.isNaN(encodedAmount) && encodedAmount > 0) {
// Estimate roughly - this field doesn't contain real cents
// Use formatted_amount to get the actual dollar amount
if (priceObj.formatted_amount) {
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
if (match) {
const dollars = Number.parseFloat(match[0].replace(',', ''));
if (!Number.isNaN(dollars)) {
cents = Math.round(dollars * 100);
} else {
cents = encodedAmount; // fallback
}
} else {
cents = encodedAmount; // fallback
}
} else {
cents = encodedAmount; // fallback
}
} else {
continue; // Invalid price
}
} else {
continue; // No price available
}
if (!Number.isFinite(cents) || cents <= 0) continue;
// Extract address from location data if available
const cityName =
listing.location?.reverse_geocode?.city_page?.display_name;
const address = cityName || null;
// Determine listing status from Facebook flags
let listingStatus: string | undefined = undefined;
if (listing.is_sold) {
listingStatus = "SOLD";
} else if (listing.is_pending) {
listingStatus = "PENDING";
} else if (listing.is_live) {
listingStatus = "ACTIVE";
} else if (listing.is_hidden) {
listingStatus = "HIDDEN";
}
// Format creation date if available
const creationDate = listing.creation_time
? new Date(listing.creation_time * 1000).toISOString()
: undefined;
// Extract image and video URLs
const imageUrl = listing.primary_listing_photo?.image?.uri;
const videoUrl = listing.listing_video ? `https://www.facebook.com/${listing.listing_video.id}/` : undefined;
// Extract seller information
const seller = listing.marketplace_listing_seller ? {
name: listing.marketplace_listing_seller.name,
id: listing.marketplace_listing_seller.id
} : undefined;
const listingDetails: ListingDetails = {
url,
title,
listingPrice: {
amountFormatted: priceObj.formatted_amount || formatCentsToCurrency(cents),
cents,
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
},
address,
creationDate,
listingType: "item", // Default type for marketplace listings
listingStatus,
categoryId: listing.marketplace_listing_category_id,
imageUrl,
videoUrl,
seller,
deliveryTypes: listing.delivery_types,
};
results.push(listingDetails);
} catch {
// Skip malformed ads
continue;
}
}
return results;
}
// ----------------------------- Main -----------------------------
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1,
LOCATION = "toronto",
MAX_ITEMS = 25,
cookiesSource?: string,
) {
// Load Facebook cookies - required for Facebook Marketplace access
const cookies = await loadFacebookCookies(cookiesSource);
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
);
}
// Format cookies for HTTP header
const domain = "www.facebook.com";
const cookiesHeader = formatCookiesForHeader(cookies, domain);
if (!cookiesHeader) {
throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
);
}
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
// Encode search query for URL
const encodedQuery = encodeURIComponent(SEARCH_QUERY);
// Facebook marketplace URL structure
const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`;
console.log(`Fetching Facebook marketplace: ${searchUrl}`);
console.log(`Using ${cookies.length} cookies for authentication`);
let searchHtml: string;
try {
searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
console.log(
"\n" +
`Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
);
}
},
cookies: cookiesHeader,
});
} catch (err) {
if (err instanceof HttpError) {
console.warn(
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
);
if (err.status === 400 || err.status === 401 || err.status === 403) {
console.warn(
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
);
}
return [];
}
throw err;
}
const ads = extractFacebookMarketplaceData(searchHtml);
if (!ads || ads.length === 0) {
console.warn("No ads parsed from Facebook marketplace page.");
return [];
}
console.log(`\nFound ${ads.length} raw ads. Processing...`);
const progressBar = new cliProgress.SingleBar(
{},
cliProgress.Presets.shades_classic,
);
const totalProgress = ads.length;
let currentProgress = 0;
progressBar.start(totalProgress, currentProgress);
const items = parseFacebookAds(ads);
// Filter to only priced items (already done in parseFacebookAds)
const pricedItems = items.filter(
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
);
progressBar.update(totalProgress);
progressBar.stop();
console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
return pricedItems.slice(0, MAX_ITEMS); // Limit results
}