928 lines
26 KiB
TypeScript
928 lines
26 KiB
TypeScript
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
import { parseHTML } from "linkedom";
|
|
import cliProgress from "cli-progress";
|
|
|
|
/**
|
|
* Facebook Marketplace Scraper
|
|
*
|
|
* Note: Facebook Marketplace requires authentication cookies for full access.
|
|
* This implementation will return limited or no results without proper authentication.
|
|
* This is by design to respect Facebook's authentication requirements.
|
|
*/
|
|
|
|
// ----------------------------- Types -----------------------------
|
|
|
|
type HTMLString = string;
|
|
|
|
interface Cookie {
|
|
name: string;
|
|
value: string;
|
|
domain: string;
|
|
path: string;
|
|
secure?: boolean;
|
|
httpOnly?: boolean;
|
|
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
|
session?: boolean;
|
|
expirationDate?: number;
|
|
partitionKey?: Record<string, unknown>;
|
|
storeId?: string;
|
|
}
|
|
|
|
interface FacebookAdNode {
|
|
node: {
|
|
listing: {
|
|
id: string;
|
|
marketplace_listing_title?: string;
|
|
listing_price?: {
|
|
amount?: string | number;
|
|
currency?: string;
|
|
};
|
|
location?: {
|
|
reverse_geocode?: {
|
|
city_page?: {
|
|
display_name?: string;
|
|
};
|
|
};
|
|
};
|
|
creation_time?: number;
|
|
[k: string]: unknown;
|
|
};
|
|
[k: string]: unknown;
|
|
};
|
|
}
|
|
|
|
interface FacebookEdge {
|
|
node: FacebookAdNode["node"];
|
|
[k: string]: unknown;
|
|
}
|
|
|
|
interface FacebookMarketplaceSearch {
|
|
feed_units?: {
|
|
edges?: FacebookEdge[];
|
|
};
|
|
[k: string]: unknown;
|
|
}
|
|
|
|
interface FacebookRequireData {
|
|
require?: [number, number, number, FacebookMarketplaceSearch, number][];
|
|
[k: string]: unknown;
|
|
}
|
|
|
|
interface FacebookMarketplaceItem {
|
|
// Basic identification
|
|
id: string;
|
|
__typename: "GroupCommerceProductItem";
|
|
|
|
// Listing content
|
|
marketplace_listing_title: string;
|
|
redacted_description?: {
|
|
text: string;
|
|
};
|
|
custom_title?: string;
|
|
|
|
// Pricing
|
|
formatted_price?: {
|
|
text: string;
|
|
};
|
|
listing_price?: {
|
|
amount: string;
|
|
currency: string;
|
|
amount_with_offset: string;
|
|
};
|
|
|
|
// Location
|
|
location_text?: {
|
|
text: string;
|
|
};
|
|
location?: {
|
|
latitude: number;
|
|
longitude: number;
|
|
reverse_geocode_detailed?: {
|
|
country_alpha_two: string;
|
|
postal_code_trimmed: string;
|
|
};
|
|
};
|
|
|
|
// Status flags
|
|
is_live?: boolean;
|
|
is_sold?: boolean;
|
|
is_pending?: boolean;
|
|
is_hidden?: boolean;
|
|
is_draft?: boolean;
|
|
|
|
// Timing
|
|
creation_time?: number;
|
|
|
|
// Seller information
|
|
marketplace_listing_seller?: {
|
|
__typename: "User";
|
|
id: string;
|
|
name: string;
|
|
profile_picture?: {
|
|
uri: string;
|
|
};
|
|
join_time?: number;
|
|
};
|
|
|
|
// Vehicle-specific fields (for automotive listings)
|
|
vehicle_make_display_name?: string;
|
|
vehicle_model_display_name?: string;
|
|
vehicle_odometer_data?: {
|
|
unit: "KILOMETERS" | "MILES";
|
|
value: number;
|
|
};
|
|
vehicle_transmission_type?: "AUTOMATIC" | "MANUAL";
|
|
vehicle_exterior_color?: string;
|
|
vehicle_interior_color?: string;
|
|
vehicle_condition?: "EXCELLENT" | "GOOD" | "FAIR" | "POOR";
|
|
vehicle_fuel_type?: string;
|
|
vehicle_trim_display_name?: string;
|
|
|
|
// Category and commerce
|
|
marketplace_listing_category_id?: string;
|
|
condition?: string;
|
|
|
|
// Commerce features
|
|
delivery_types?: string[];
|
|
is_shipping_offered?: boolean;
|
|
is_buy_now_enabled?: boolean;
|
|
can_buyer_make_checkout_offer?: boolean;
|
|
|
|
// Communication
|
|
messaging_enabled?: boolean;
|
|
first_message_suggested_value?: string;
|
|
|
|
// Metadata
|
|
logging_id?: string;
|
|
reportable_ent_id?: string;
|
|
|
|
// Related listings (for part-out sellers)
|
|
marketplace_listing_sets?: {
|
|
edges: Array<{
|
|
node: {
|
|
canonical_listing: {
|
|
id: string;
|
|
marketplace_listing_title: string;
|
|
is_live: boolean;
|
|
is_sold: boolean;
|
|
formatted_price: { text: string };
|
|
};
|
|
};
|
|
}>;
|
|
};
|
|
|
|
[k: string]: unknown;
|
|
}
|
|
|
|
type ListingDetails = {
|
|
url: string;
|
|
title: string;
|
|
description?: string;
|
|
listingPrice?: {
|
|
amountFormatted: string;
|
|
cents?: number;
|
|
currency?: string;
|
|
};
|
|
listingType?: string;
|
|
listingStatus?: string;
|
|
creationDate?: string;
|
|
endDate?: string;
|
|
numberOfViews?: number;
|
|
address?: string | null;
|
|
// Facebook-specific fields
|
|
imageUrl?: string;
|
|
videoUrl?: string;
|
|
seller?: {
|
|
name?: string;
|
|
id?: string;
|
|
};
|
|
categoryId?: string;
|
|
deliveryTypes?: string[];
|
|
};
|
|
|
|
// ----------------------------- Utilities -----------------------------
|
|
|
|
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
return typeof value === "object" && value !== null;
|
|
}
|
|
|
|
async function delay(ms: number): Promise<void> {
|
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
/**
|
|
* Load Facebook cookies from file or string
|
|
*/
|
|
async function loadFacebookCookies(cookiesSource?: string, cookiePath = './cookies/facebook.json'): Promise<Cookie[]> {
|
|
// First try to load from provided string parameter
|
|
if (cookiesSource) {
|
|
try {
|
|
const cookies = JSON.parse(cookiesSource);
|
|
if (Array.isArray(cookies)) {
|
|
return cookies.filter(
|
|
(cookie): cookie is Cookie =>
|
|
cookie &&
|
|
typeof cookie.name === "string" &&
|
|
typeof cookie.value === "string",
|
|
);
|
|
}
|
|
} catch (e) {
|
|
throw new Error(`Invalid cookies JSON provided: ${e}`);
|
|
}
|
|
}
|
|
|
|
// Try to load from specified path
|
|
try {
|
|
const cookiesPath = cookiePath;
|
|
const file = Bun.file(cookiesPath);
|
|
if (await file.exists()) {
|
|
const content = await file.text();
|
|
const cookies = JSON.parse(content);
|
|
if (Array.isArray(cookies)) {
|
|
return cookies.filter(
|
|
(cookie): cookie is Cookie =>
|
|
cookie &&
|
|
typeof cookie.name === "string" &&
|
|
typeof cookie.value === "string",
|
|
);
|
|
}
|
|
}
|
|
} catch (e) {
|
|
console.warn(`Could not load cookies from ./cookies/facebook.json: ${e}`);
|
|
}
|
|
|
|
return [];
|
|
}
|
|
|
|
/**
|
|
* Parse Facebook cookie string into Cookie array format
|
|
*/
|
|
function parseFacebookCookieString(cookieString: string): Cookie[] {
|
|
if (!cookieString || !cookieString.trim()) {
|
|
return [];
|
|
}
|
|
|
|
return cookieString
|
|
.split(';')
|
|
.map(pair => pair.trim())
|
|
.filter(pair => pair.includes('='))
|
|
.map(pair => {
|
|
const [name, value] = pair.split('=', 2);
|
|
const trimmedName = name.trim();
|
|
const trimmedValue = value.trim();
|
|
|
|
// Skip empty names or values
|
|
if (!trimmedName || !trimmedValue) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
name: trimmedName,
|
|
value: decodeURIComponent(trimmedValue),
|
|
domain: '.facebook.com',
|
|
path: '/',
|
|
secure: true,
|
|
httpOnly: false,
|
|
sameSite: 'lax' as const,
|
|
expirationDate: undefined, // Session cookies
|
|
};
|
|
})
|
|
.filter((cookie): cookie is Cookie => cookie !== null);
|
|
}
|
|
|
|
/**
|
|
* Ensure Facebook cookies are available, parsing from env var if needed
|
|
*/
|
|
async function ensureFacebookCookies(cookiePath = './cookies/facebook.json'): Promise<Cookie[]> {
|
|
|
|
// First try to load existing cookies
|
|
try {
|
|
const existing = await loadFacebookCookies(undefined, cookiePath);
|
|
if (existing.length > 0) {
|
|
return existing;
|
|
}
|
|
} catch (error) {
|
|
// File doesn't exist or is invalid, continue to check env var
|
|
}
|
|
|
|
// Try to parse from environment variable
|
|
const cookieString = process.env.FACEBOOK_COOKIE;
|
|
if (!cookieString || !cookieString.trim()) {
|
|
throw new Error(
|
|
'No valid Facebook cookies found. Either:\n' +
|
|
' 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n' +
|
|
' 2. Create ./cookies/facebook.json manually with cookie array'
|
|
);
|
|
}
|
|
|
|
// Parse the cookie string
|
|
const cookies = parseFacebookCookieString(cookieString);
|
|
if (cookies.length === 0) {
|
|
throw new Error(
|
|
'FACEBOOK_COOKIE environment variable contains no valid cookies. ' +
|
|
'Expected format: "name1=value1; name2=value2;"'
|
|
);
|
|
}
|
|
|
|
// Save to file for future use
|
|
try {
|
|
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
|
|
console.log(`✅ Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
|
|
} catch (error) {
|
|
console.warn(`⚠️ Could not save cookies to ${cookiePath}: ${error}`);
|
|
// Continue anyway, we have the cookies in memory
|
|
}
|
|
|
|
return cookies;
|
|
}
|
|
|
|
/**
|
|
* Format cookies array into Cookie header string
|
|
*/
|
|
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
|
|
const validCookies = cookies
|
|
.filter((cookie) => {
|
|
// Check if cookie applies to this domain
|
|
if (cookie.domain.startsWith(".")) {
|
|
// Domain cookie (applies to subdomains)
|
|
return (
|
|
domain.endsWith(cookie.domain.slice(1)) ||
|
|
domain === cookie.domain.slice(1)
|
|
);
|
|
} else {
|
|
// Host-only cookie
|
|
return cookie.domain === domain;
|
|
}
|
|
})
|
|
.filter((cookie) => {
|
|
// Check expiration
|
|
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
|
return false; // Expired
|
|
}
|
|
return true;
|
|
});
|
|
|
|
return validCookies
|
|
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
|
.join("; ");
|
|
}
|
|
|
|
class HttpError extends Error {
|
|
constructor(
|
|
message: string,
|
|
public readonly status: number,
|
|
public readonly url: string,
|
|
) {
|
|
super(message);
|
|
this.name = "HttpError";
|
|
}
|
|
}
|
|
|
|
// ----------------------------- HTTP Client -----------------------------
|
|
|
|
/**
|
|
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls.
|
|
- Retries on 429 and 5xx
|
|
- Respects X-RateLimit-Reset when present (seconds)
|
|
- Supports custom cookies for Facebook authentication
|
|
*/
|
|
async function fetchHtml(
|
|
url: string,
|
|
DELAY_MS: number,
|
|
opts?: {
|
|
maxRetries?: number;
|
|
retryBaseMs?: number;
|
|
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
|
cookies?: string;
|
|
},
|
|
): Promise<HTMLString> {
|
|
const maxRetries = opts?.maxRetries ?? 3;
|
|
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
|
|
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
try {
|
|
const headers: Record<string, string> = {
|
|
accept:
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
|
"accept-encoding": "gzip, deflate, br",
|
|
"cache-control": "no-cache",
|
|
"upgrade-insecure-requests": "1",
|
|
"sec-fetch-dest": "document",
|
|
"sec-fetch-mode": "navigate",
|
|
"sec-fetch-site": "none",
|
|
"sec-fetch-user": "?1",
|
|
"user-agent":
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
};
|
|
|
|
// Add cookies if provided
|
|
if (opts?.cookies) {
|
|
headers["cookie"] = opts.cookies;
|
|
}
|
|
|
|
const res = await fetch(url, {
|
|
method: "GET",
|
|
headers,
|
|
});
|
|
|
|
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
|
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
|
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
|
|
|
if (!res.ok) {
|
|
// Respect 429 reset if provided
|
|
if (res.status === 429) {
|
|
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN;
|
|
const waitMs = Number.isFinite(resetSeconds)
|
|
? Math.max(0, resetSeconds * 1000)
|
|
: (attempt + 1) * retryBaseMs;
|
|
await delay(waitMs);
|
|
continue;
|
|
}
|
|
// For Facebook, 400 often means authentication required
|
|
// Don't retry 4xx client errors except 429
|
|
if (res.status >= 400 && res.status < 500 && res.status !== 429) {
|
|
throw new HttpError(
|
|
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
|
|
res.status,
|
|
url,
|
|
);
|
|
}
|
|
// Retry on 5xx
|
|
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
|
await delay((attempt + 1) * retryBaseMs);
|
|
continue;
|
|
}
|
|
throw new HttpError(
|
|
`Request failed with status ${res.status}`,
|
|
res.status,
|
|
url,
|
|
);
|
|
}
|
|
|
|
const html = await res.text();
|
|
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
|
await delay(DELAY_MS);
|
|
return html;
|
|
} catch (err) {
|
|
if (attempt >= maxRetries) throw err;
|
|
await delay((attempt + 1) * retryBaseMs);
|
|
}
|
|
}
|
|
|
|
throw new Error("Exhausted retries without response");
|
|
}
|
|
|
|
// ----------------------------- Parsing -----------------------------
|
|
|
|
/**
|
|
Extract marketplace search data from Facebook page script tags
|
|
*/
|
|
function extractFacebookMarketplaceData(
|
|
htmlString: HTMLString,
|
|
): FacebookAdNode[] | null {
|
|
const { document } = parseHTML(htmlString);
|
|
const scripts = document.querySelectorAll("script");
|
|
|
|
let marketplaceData: FacebookMarketplaceSearch | null = null;
|
|
|
|
// Find the script containing the require data with marketplace_search
|
|
for (const script of Array.from(scripts) as HTMLScriptElement[]) {
|
|
const scriptText = script.textContent;
|
|
if (!scriptText) continue;
|
|
|
|
try {
|
|
const parsed = JSON.parse(scriptText);
|
|
|
|
// First check if this is the direct data structure (like in examples)
|
|
if (parsed.require && Array.isArray(parsed.require)) {
|
|
// Try multiple navigation paths to find marketplace_search
|
|
const paths = [
|
|
// Original path from example
|
|
() => parsed.require[0][3][0]['__bbox']['require'][0][3][1]['__bbox']['result']['data']['marketplace_search'],
|
|
// Alternative path structure
|
|
() => parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search,
|
|
// Another variation
|
|
() => parsed.require[0][3][0]['__bbox']['result']['data']['marketplace_search'],
|
|
// Direct access for some responses
|
|
() => {
|
|
for (const item of parsed.require) {
|
|
if (item && item.length >= 4 && item[3]) {
|
|
const bbox = item[3]?.['__bbox']?.result?.data?.marketplace_search;
|
|
if (bbox) return bbox;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
];
|
|
|
|
for (const getData of paths) {
|
|
try {
|
|
const result = getData();
|
|
if (result && isRecord(result) && result.feed_units?.edges?.length > 0) {
|
|
marketplaceData = result as FacebookMarketplaceSearch;
|
|
break;
|
|
}
|
|
} catch {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (marketplaceData) break;
|
|
}
|
|
|
|
// Also check for direct marketplace_search in the parsed data
|
|
if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) {
|
|
const searchData = parsed.marketplace_search as FacebookMarketplaceSearch;
|
|
if (searchData.feed_units?.edges?.length > 0) {
|
|
marketplaceData = searchData;
|
|
break;
|
|
}
|
|
}
|
|
} catch {
|
|
// Ignore parsing errors for other scripts
|
|
}
|
|
}
|
|
|
|
if (!marketplaceData?.feed_units?.edges?.length) {
|
|
console.warn("No marketplace data found in HTML response");
|
|
return null;
|
|
}
|
|
|
|
console.log(`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`);
|
|
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
|
|
}
|
|
|
|
/**
|
|
* Monitor API extraction success/failure for detecting changes
|
|
*/
|
|
let extractionStats = {
|
|
totalExtractions: 0,
|
|
successfulExtractions: 0,
|
|
failedExtractions: 0,
|
|
lastApiChangeDetected: null as Date | null,
|
|
};
|
|
|
|
/**
|
|
* Log extraction metrics for monitoring API stability
|
|
*/
|
|
function logExtractionMetrics(success: boolean, itemId?: string) {
|
|
extractionStats.totalExtractions++;
|
|
if (success) {
|
|
extractionStats.successfulExtractions++;
|
|
} else {
|
|
extractionStats.failedExtractions++;
|
|
}
|
|
|
|
// Log warning if extraction success rate drops below 80%
|
|
const successRate = extractionStats.successfulExtractions / extractionStats.totalExtractions;
|
|
if (extractionStats.totalExtractions > 10 && successRate < 0.8 && !extractionStats.lastApiChangeDetected) {
|
|
console.warn("⚠️ Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.");
|
|
extractionStats.lastApiChangeDetected = new Date();
|
|
}
|
|
|
|
if (success) {
|
|
console.log(`📊 Facebook API extraction stats: ${extractionStats.successfulExtractions}/${extractionStats.totalExtractions} successful`);
|
|
} else {
|
|
console.warn(`❌ Facebook API extraction failed for item ${itemId || 'unknown'}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Turns cents to localized currency string.
|
|
*/
|
|
function formatCentsToCurrency(
|
|
num: number | string | undefined,
|
|
locale = "en-US",
|
|
): string {
|
|
if (num == null) return "";
|
|
const cents = typeof num === "string" ? Number.parseInt(num, 10) : num;
|
|
if (Number.isNaN(cents)) return "";
|
|
const dollars = cents / 100;
|
|
const formatter = new Intl.NumberFormat(locale, {
|
|
style: 'currency',
|
|
currency: 'USD',
|
|
minimumFractionDigits: 2,
|
|
maximumFractionDigits: 2,
|
|
useGrouping: true,
|
|
});
|
|
return formatter.format(dollars);
|
|
}
|
|
|
|
/**
|
|
Extract marketplace item details from Facebook item page HTML
|
|
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
|
|
*/
|
|
function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceItem | null {
|
|
const { document } = parseHTML(htmlString);
|
|
const scripts = document.querySelectorAll("script");
|
|
|
|
for (const script of scripts) {
|
|
const scriptText = script.textContent;
|
|
if (!scriptText) continue;
|
|
|
|
try {
|
|
const parsed = JSON.parse(scriptText);
|
|
|
|
// Check for the 2026 require structure with marketplace product details
|
|
if (parsed.require && Array.isArray(parsed.require)) {
|
|
// Try multiple extraction paths discovered from reverse engineering
|
|
const extractionPaths = [
|
|
// Path 1: Primary path from current API structure
|
|
() => parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target,
|
|
// Path 2: Alternative path with nested require
|
|
() => parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target,
|
|
// Path 3: Variation without the [0] index
|
|
() => parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target,
|
|
// Path 4-5: Additional fallback paths for edge cases
|
|
() => parsed.require[0][3][1]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target,
|
|
() => parsed.require[0][3][2]?.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target,
|
|
];
|
|
|
|
let pathIndex = 0;
|
|
for (const getPath of extractionPaths) {
|
|
try {
|
|
const targetData = getPath();
|
|
if (targetData && typeof targetData === 'object' &&
|
|
targetData.id && targetData.marketplace_listing_title &&
|
|
targetData.__typename === 'GroupCommerceProductItem') {
|
|
console.log(`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`);
|
|
return targetData as FacebookMarketplaceItem;
|
|
}
|
|
} catch {
|
|
// Path not found or invalid, try next path
|
|
}
|
|
pathIndex++;
|
|
}
|
|
|
|
// Fallback: Search recursively for marketplace data in the parsed structure
|
|
const findMarketplaceData = (obj: unknown, depth = 0, maxDepth = 10): FacebookMarketplaceItem | null => {
|
|
if (depth > maxDepth) return null; // Prevent infinite recursion
|
|
if (isRecord(obj)) {
|
|
// Check if this object matches the expected marketplace item structure
|
|
if (obj.marketplace_listing_title && obj.id &&
|
|
obj.__typename === 'GroupCommerceProductItem' &&
|
|
obj.redacted_description) {
|
|
return obj as FacebookMarketplaceItem;
|
|
}
|
|
// Recursively search nested objects and arrays
|
|
for (const key in obj) {
|
|
const value = obj[key];
|
|
if (isRecord(value) || Array.isArray(value)) {
|
|
const result = findMarketplaceData(value, depth + 1, maxDepth);
|
|
if (result) return result;
|
|
}
|
|
}
|
|
} else if (Array.isArray(obj)) {
|
|
// Search through arrays
|
|
for (const item of obj) {
|
|
const result = findMarketplaceData(item, depth + 1, maxDepth);
|
|
if (result) return result;
|
|
}
|
|
}
|
|
return null;
|
|
};
|
|
|
|
// Search through the entire require structure
|
|
const recursiveResult = findMarketplaceData(parsed.require);
|
|
if (recursiveResult) {
|
|
console.log('Successfully extracted Facebook item data using recursive search');
|
|
return recursiveResult;
|
|
}
|
|
|
|
// Additional search in other potential locations
|
|
if (parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page?.target) {
|
|
const bboxData = parsed.__bbox.result.data.viewer.marketplace_product_details_page.target;
|
|
if (bboxData && typeof bboxData === 'object' &&
|
|
bboxData.id && bboxData.marketplace_listing_title &&
|
|
bboxData.__typename === 'GroupCommerceProductItem') {
|
|
console.log('Successfully extracted Facebook item data from __bbox structure');
|
|
return bboxData as FacebookMarketplaceItem;
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
// Log parsing errors for debugging but continue to next script
|
|
console.debug(`Failed to parse script for Facebook item data: ${error}`);
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
Parse Facebook marketplace search results into ListingDetails[]
|
|
*/
|
|
function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
|
|
const results: ListingDetails[] = [];
|
|
|
|
for (const adJson of ads) {
|
|
try {
|
|
const listing = adJson.node.listing;
|
|
const title = listing.marketplace_listing_title;
|
|
const priceObj = listing.listing_price;
|
|
|
|
if (!title || !priceObj) continue;
|
|
|
|
const id = listing.id;
|
|
const url = `https://www.facebook.com/marketplace/item/${id}`;
|
|
|
|
// Facebook stores price in different fields:
|
|
// - amount_with_offset_in_currency: Facebook's internal price encoding (not cents)
|
|
// - amount: dollars (like "1.00")
|
|
// - formatted_amount: human-readable price (like "CA$1")
|
|
let cents: number;
|
|
if (priceObj.amount != null) {
|
|
const dollars = typeof priceObj.amount === 'string'
|
|
? Number.parseFloat(priceObj.amount)
|
|
: priceObj.amount;
|
|
cents = Math.round(dollars * 100);
|
|
} else if (priceObj.amount_with_offset_in_currency != null) {
|
|
// Fallback: try to extract cents from amount_with_offset_in_currency
|
|
// This appears to use some exchange rate/multiplier format
|
|
const encodedAmount = Number(priceObj.amount_with_offset_in_currency);
|
|
if (!Number.isNaN(encodedAmount) && encodedAmount > 0) {
|
|
// Estimate roughly - this field doesn't contain real cents
|
|
// Use formatted_amount to get the actual dollar amount
|
|
if (priceObj.formatted_amount) {
|
|
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
|
|
if (match) {
|
|
const dollars = Number.parseFloat(match[0].replace(',', ''));
|
|
if (!Number.isNaN(dollars)) {
|
|
cents = Math.round(dollars * 100);
|
|
} else {
|
|
cents = encodedAmount; // fallback
|
|
}
|
|
} else {
|
|
cents = encodedAmount; // fallback
|
|
}
|
|
} else {
|
|
cents = encodedAmount; // fallback
|
|
}
|
|
} else {
|
|
continue; // Invalid price
|
|
}
|
|
} else {
|
|
continue; // No price available
|
|
}
|
|
|
|
if (!Number.isFinite(cents) || cents <= 0) continue;
|
|
|
|
// Extract address from location data if available
|
|
const cityName =
|
|
listing.location?.reverse_geocode?.city_page?.display_name;
|
|
const address = cityName || null;
|
|
|
|
// Determine listing status from Facebook flags
|
|
let listingStatus: string | undefined = undefined;
|
|
if (listing.is_sold) {
|
|
listingStatus = "SOLD";
|
|
} else if (listing.is_pending) {
|
|
listingStatus = "PENDING";
|
|
} else if (listing.is_live) {
|
|
listingStatus = "ACTIVE";
|
|
} else if (listing.is_hidden) {
|
|
listingStatus = "HIDDEN";
|
|
}
|
|
|
|
// Format creation date if available
|
|
const creationDate = listing.creation_time
|
|
? new Date(listing.creation_time * 1000).toISOString()
|
|
: undefined;
|
|
|
|
// Extract image and video URLs
|
|
const imageUrl = listing.primary_listing_photo?.image?.uri;
|
|
const videoUrl = listing.listing_video ? `https://www.facebook.com/${listing.listing_video.id}/` : undefined;
|
|
|
|
// Extract seller information
|
|
const seller = listing.marketplace_listing_seller ? {
|
|
name: listing.marketplace_listing_seller.name,
|
|
id: listing.marketplace_listing_seller.id
|
|
} : undefined;
|
|
|
|
const listingDetails: ListingDetails = {
|
|
url,
|
|
title,
|
|
listingPrice: {
|
|
amountFormatted: priceObj.formatted_amount || formatCentsToCurrency(cents),
|
|
cents,
|
|
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
|
|
},
|
|
address,
|
|
creationDate,
|
|
listingType: "item", // Default type for marketplace listings
|
|
listingStatus,
|
|
categoryId: listing.marketplace_listing_category_id,
|
|
imageUrl,
|
|
videoUrl,
|
|
seller,
|
|
deliveryTypes: listing.delivery_types,
|
|
};
|
|
|
|
results.push(listingDetails);
|
|
} catch {
|
|
// Skip malformed ads
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ----------------------------- Main -----------------------------
|
|
|
|
export default async function fetchFacebookItems(
|
|
SEARCH_QUERY: string,
|
|
REQUESTS_PER_SECOND = 1,
|
|
LOCATION = "toronto",
|
|
MAX_ITEMS = 25,
|
|
cookiesSource?: string,
|
|
) {
|
|
// Load Facebook cookies - required for Facebook Marketplace access
|
|
const cookies = await loadFacebookCookies(cookiesSource);
|
|
if (cookies.length === 0) {
|
|
throw new Error(
|
|
"Facebook cookies are required for marketplace access. " +
|
|
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
|
|
);
|
|
}
|
|
|
|
// Format cookies for HTTP header
|
|
const domain = "www.facebook.com";
|
|
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
|
if (!cookiesHeader) {
|
|
throw new Error(
|
|
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
|
);
|
|
}
|
|
|
|
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
|
|
|
// Encode search query for URL
|
|
const encodedQuery = encodeURIComponent(SEARCH_QUERY);
|
|
|
|
// Facebook marketplace URL structure
|
|
const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`;
|
|
|
|
console.log(`Fetching Facebook marketplace: ${searchUrl}`);
|
|
console.log(`Using ${cookies.length} cookies for authentication`);
|
|
|
|
let searchHtml: string;
|
|
try {
|
|
searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
|
|
onRateInfo: (remaining, reset) => {
|
|
if (remaining && reset) {
|
|
console.log(
|
|
"\n" +
|
|
`Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
|
);
|
|
}
|
|
},
|
|
cookies: cookiesHeader,
|
|
});
|
|
} catch (err) {
|
|
if (err instanceof HttpError) {
|
|
console.warn(
|
|
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
|
|
);
|
|
if (err.status === 400 || err.status === 401 || err.status === 403) {
|
|
console.warn(
|
|
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
|
|
);
|
|
}
|
|
return [];
|
|
}
|
|
throw err;
|
|
}
|
|
|
|
const ads = extractFacebookMarketplaceData(searchHtml);
|
|
if (!ads || ads.length === 0) {
|
|
console.warn("No ads parsed from Facebook marketplace page.");
|
|
return [];
|
|
}
|
|
|
|
console.log(`\nFound ${ads.length} raw ads. Processing...`);
|
|
|
|
const progressBar = new cliProgress.SingleBar(
|
|
{},
|
|
cliProgress.Presets.shades_classic,
|
|
);
|
|
const totalProgress = ads.length;
|
|
let currentProgress = 0;
|
|
progressBar.start(totalProgress, currentProgress);
|
|
|
|
const items = parseFacebookAds(ads);
|
|
|
|
// Filter to only priced items (already done in parseFacebookAds)
|
|
const pricedItems = items.filter(
|
|
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
|
|
);
|
|
|
|
progressBar.update(totalProgress);
|
|
progressBar.stop();
|
|
|
|
console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
|
|
return pricedItems.slice(0, MAX_ITEMS); // Limit results
|
|
}
|