feat: facebook scraping
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -41,3 +41,4 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
|
|
||||||
examples/*
|
examples/*
|
||||||
|
cookies/*.json
|
||||||
|
|||||||
52
cookies/README.md
Normal file
52
cookies/README.md
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# Facebook Marketplace Cookies Setup
|
||||||
|
|
||||||
|
To use the Facebook Marketplace scraper, you need to provide valid Facebook session cookies.
|
||||||
|
|
||||||
|
## Option 1: Cookies File (`facebook.json`)
|
||||||
|
|
||||||
|
1. Log into Facebook in your browser
|
||||||
|
2. Open Developer Tools → Network tab
|
||||||
|
3. Visit facebook.com/marketplace (ensure you're logged in)
|
||||||
|
4. Look for any marketplace-related requests in the Network tab
|
||||||
|
5. Export cookies from the browser's Application/Storage → Cookies section
|
||||||
|
6. Save the cookies as a JSON array to `facebook.json`
|
||||||
|
|
||||||
|
The `facebook.json` file should contain Facebook session cookies, particularly:
|
||||||
|
- `c_user`: Your Facebook user ID
|
||||||
|
- `xs`: Facebook session token
|
||||||
|
- `fr`: Facebook request token
|
||||||
|
- `datr`: Data attribution token
|
||||||
|
- `sb`: Session browser token
|
||||||
|
|
||||||
|
Example structure:
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "c_user",
|
||||||
|
"value": "123456789",
|
||||||
|
"domain": ".facebook.com",
|
||||||
|
"path": "/",
|
||||||
|
"secure": true
|
||||||
|
},
|
||||||
|
// ... other cookies
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Option 2: URL Parameter
|
||||||
|
|
||||||
|
You can pass cookies directly via the `cookies` URL parameter:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123","domain":".facebook.com",...}]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Important Notes
|
||||||
|
|
||||||
|
- Cookies must be from an active Facebook session
|
||||||
|
- Cookies expire, so you may need to refresh them periodically
|
||||||
|
- Never share real cookies or commit them to version control
|
||||||
|
- Facebook may block automated scraping even with valid cookies
|
||||||
|
|
||||||
|
## Security
|
||||||
|
|
||||||
|
The cookies file is intentionally left out of version control for security reasons.</content>
|
||||||
562
src/facebook.ts
Normal file
562
src/facebook.ts
Normal file
@@ -0,0 +1,562 @@
|
|||||||
|
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||||
|
import { parseHTML } from "linkedom";
|
||||||
|
import cliProgress from "cli-progress";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Facebook Marketplace Scraper
|
||||||
|
*
|
||||||
|
* Note: Facebook Marketplace requires authentication cookies for full access.
|
||||||
|
* This implementation will return limited or no results without proper authentication.
|
||||||
|
* This is by design to respect Facebook's authentication requirements.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// ----------------------------- Types -----------------------------
|
||||||
|
|
||||||
|
type HTMLString = string;
|
||||||
|
|
||||||
|
interface Cookie {
|
||||||
|
name: string;
|
||||||
|
value: string;
|
||||||
|
domain: string;
|
||||||
|
path: string;
|
||||||
|
secure?: boolean;
|
||||||
|
httpOnly?: boolean;
|
||||||
|
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||||
|
session?: boolean;
|
||||||
|
expirationDate?: number;
|
||||||
|
partitionKey?: any;
|
||||||
|
storeId?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FacebookAdNode {
|
||||||
|
node: {
|
||||||
|
listing: {
|
||||||
|
id: string;
|
||||||
|
marketplace_listing_title?: string;
|
||||||
|
listing_price?: {
|
||||||
|
amount?: string | number;
|
||||||
|
currency?: string;
|
||||||
|
};
|
||||||
|
location?: {
|
||||||
|
reverse_geocode?: {
|
||||||
|
city_page?: {
|
||||||
|
display_name?: string;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
creation_time?: number;
|
||||||
|
[k: string]: unknown;
|
||||||
|
};
|
||||||
|
[k: string]: unknown;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FacebookEdge {
|
||||||
|
node: FacebookAdNode["node"];
|
||||||
|
[k: string]: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FacebookMarketplaceSearch {
|
||||||
|
feed_units?: {
|
||||||
|
edges?: FacebookEdge[];
|
||||||
|
};
|
||||||
|
[k: string]: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FacebookRequireData {
|
||||||
|
require?: [number, number, number, FacebookMarketplaceSearch, number][];
|
||||||
|
[k: string]: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
type ListingDetails = {
|
||||||
|
url: string;
|
||||||
|
title: string;
|
||||||
|
description?: string;
|
||||||
|
listingPrice?: {
|
||||||
|
amountFormatted: string;
|
||||||
|
cents?: number;
|
||||||
|
currency?: string;
|
||||||
|
};
|
||||||
|
listingType?: string;
|
||||||
|
listingStatus?: string;
|
||||||
|
creationDate?: string;
|
||||||
|
endDate?: string;
|
||||||
|
numberOfViews?: number;
|
||||||
|
address?: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
// ----------------------------- Utilities -----------------------------
|
||||||
|
|
||||||
|
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||||
|
return typeof value === "object" && value !== null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function delay(ms: number): Promise<void> {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load Facebook cookies from file or string
|
||||||
|
*/
|
||||||
|
async function loadFacebookCookies(cookiesSource?: string): Promise<Cookie[]> {
|
||||||
|
// First try to load from provided string parameter
|
||||||
|
if (cookiesSource) {
|
||||||
|
try {
|
||||||
|
const cookies = JSON.parse(cookiesSource);
|
||||||
|
if (Array.isArray(cookies)) {
|
||||||
|
return cookies.filter(
|
||||||
|
(cookie): cookie is Cookie =>
|
||||||
|
cookie &&
|
||||||
|
typeof cookie.name === "string" &&
|
||||||
|
typeof cookie.value === "string",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
throw new Error(`Invalid cookies JSON provided: ${e}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to load from ./cookies/facebook.json
|
||||||
|
try {
|
||||||
|
const cookiesPath = "./cookies/facebook.json";
|
||||||
|
const file = Bun.file(cookiesPath);
|
||||||
|
if (await file.exists()) {
|
||||||
|
const content = await file.text();
|
||||||
|
const cookies = JSON.parse(content);
|
||||||
|
if (Array.isArray(cookies)) {
|
||||||
|
return cookies.filter(
|
||||||
|
(cookie): cookie is Cookie =>
|
||||||
|
cookie &&
|
||||||
|
typeof cookie.name === "string" &&
|
||||||
|
typeof cookie.value === "string",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`Could not load cookies from ./cookies/facebook.json: ${e}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format cookies array into Cookie header string
|
||||||
|
*/
|
||||||
|
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
|
||||||
|
const validCookies = cookies
|
||||||
|
.filter((cookie) => {
|
||||||
|
// Check if cookie applies to this domain
|
||||||
|
if (cookie.domain.startsWith(".")) {
|
||||||
|
// Domain cookie (applies to subdomains)
|
||||||
|
return (
|
||||||
|
domain.endsWith(cookie.domain.slice(1)) ||
|
||||||
|
domain === cookie.domain.slice(1)
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// Host-only cookie
|
||||||
|
return cookie.domain === domain;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.filter((cookie) => {
|
||||||
|
// Check expiration
|
||||||
|
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
||||||
|
return false; // Expired
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
return validCookies
|
||||||
|
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||||
|
.join("; ");
|
||||||
|
}
|
||||||
|
|
||||||
|
class HttpError extends Error {
|
||||||
|
constructor(
|
||||||
|
message: string,
|
||||||
|
public readonly status: number,
|
||||||
|
public readonly url: string,
|
||||||
|
) {
|
||||||
|
super(message);
|
||||||
|
this.name = "HttpError";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------- HTTP Client -----------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls.
|
||||||
|
- Retries on 429 and 5xx
|
||||||
|
- Respects X-RateLimit-Reset when present (seconds)
|
||||||
|
- Supports custom cookies for Facebook authentication
|
||||||
|
*/
|
||||||
|
async function fetchHtml(
|
||||||
|
url: string,
|
||||||
|
DELAY_MS: number,
|
||||||
|
opts?: {
|
||||||
|
maxRetries?: number;
|
||||||
|
retryBaseMs?: number;
|
||||||
|
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||||
|
cookies?: string;
|
||||||
|
},
|
||||||
|
): Promise<HTMLString> {
|
||||||
|
const maxRetries = opts?.maxRetries ?? 3;
|
||||||
|
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
||||||
|
|
||||||
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||||
|
try {
|
||||||
|
const headers: Record<string, string> = {
|
||||||
|
accept:
|
||||||
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||||
|
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||||
|
"accept-encoding": "gzip, deflate, br",
|
||||||
|
"cache-control": "no-cache",
|
||||||
|
"upgrade-insecure-requests": "1",
|
||||||
|
"sec-fetch-dest": "document",
|
||||||
|
"sec-fetch-mode": "navigate",
|
||||||
|
"sec-fetch-site": "none",
|
||||||
|
"sec-fetch-user": "?1",
|
||||||
|
"user-agent":
|
||||||
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add cookies if provided
|
||||||
|
if (opts?.cookies) {
|
||||||
|
headers["cookie"] = opts.cookies;
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = await fetch(url, {
|
||||||
|
method: "GET",
|
||||||
|
headers,
|
||||||
|
});
|
||||||
|
|
||||||
|
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
||||||
|
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
||||||
|
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
// Respect 429 reset if provided
|
||||||
|
if (res.status === 429) {
|
||||||
|
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN;
|
||||||
|
const waitMs = Number.isFinite(resetSeconds)
|
||||||
|
? Math.max(0, resetSeconds * 1000)
|
||||||
|
: (attempt + 1) * retryBaseMs;
|
||||||
|
await delay(waitMs);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// For Facebook, 400 often means authentication required
|
||||||
|
// Don't retry 4xx client errors except 429
|
||||||
|
if (res.status >= 400 && res.status < 500 && res.status !== 429) {
|
||||||
|
throw new HttpError(
|
||||||
|
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
|
||||||
|
res.status,
|
||||||
|
url,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Retry on 5xx
|
||||||
|
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||||
|
await delay((attempt + 1) * retryBaseMs);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
throw new HttpError(
|
||||||
|
`Request failed with status ${res.status}`,
|
||||||
|
res.status,
|
||||||
|
url,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const html = await res.text();
|
||||||
|
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
||||||
|
await delay(DELAY_MS);
|
||||||
|
return html;
|
||||||
|
} catch (err) {
|
||||||
|
if (attempt >= maxRetries) throw err;
|
||||||
|
await delay((attempt + 1) * retryBaseMs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error("Exhausted retries without response");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------- Parsing -----------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
Extract marketplace search data from Facebook page script tags
|
||||||
|
*/
|
||||||
|
function extractFacebookMarketplaceData(
|
||||||
|
htmlString: HTMLString,
|
||||||
|
): FacebookAdNode[] | null {
|
||||||
|
const { document } = parseHTML(htmlString);
|
||||||
|
const scripts = document.querySelectorAll("script");
|
||||||
|
|
||||||
|
let marketplaceData: FacebookMarketplaceSearch | null = null;
|
||||||
|
|
||||||
|
// Find the script containing the require data with marketplace_search
|
||||||
|
for (const script of scripts as unknown as HTMLScriptElement[]) {
|
||||||
|
const scriptText = script.textContent;
|
||||||
|
if (!scriptText) continue;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(scriptText);
|
||||||
|
|
||||||
|
// First check if this is the direct data structure (like in examples)
|
||||||
|
if (parsed.require && Array.isArray(parsed.require)) {
|
||||||
|
// Try multiple navigation paths to find marketplace_search
|
||||||
|
const paths = [
|
||||||
|
// Original path from example
|
||||||
|
() => parsed.require[0][3][0]['__bbox']['require'][0][3][1]['__bbox']['result']['data']['marketplace_search'],
|
||||||
|
// Alternative path structure
|
||||||
|
() => parsed.require[0][3][1]?.__bbox?.result?.data?.marketplace_search,
|
||||||
|
// Another variation
|
||||||
|
() => parsed.require[0][3][0]['__bbox']['result']['data']['marketplace_search'],
|
||||||
|
// Direct access for some responses
|
||||||
|
() => {
|
||||||
|
for (const item of parsed.require) {
|
||||||
|
if (item && item.length >= 4 && item[3]) {
|
||||||
|
const bbox = item[3]?.['__bbox']?.result?.data?.marketplace_search;
|
||||||
|
if (bbox) return bbox;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const getData of paths) {
|
||||||
|
try {
|
||||||
|
const result = getData();
|
||||||
|
if (result && isRecord(result) && result.feed_units?.edges) {
|
||||||
|
marketplaceData = result as FacebookMarketplaceSearch;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (marketplaceData) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also check for direct marketplace_search in the parsed data
|
||||||
|
if (parsed.marketplace_search && isRecord(parsed.marketplace_search) && parsed.marketplace_search.feed_units?.edges) {
|
||||||
|
marketplaceData = parsed.marketplace_search as FacebookMarketplaceSearch;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Ignore parsing errors for other scripts
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!marketplaceData?.feed_units?.edges) {
|
||||||
|
console.warn("No marketplace data found in HTML response");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`);
|
||||||
|
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Turns cents to localized currency string.
|
||||||
|
*/
|
||||||
|
function formatCentsToCurrency(
|
||||||
|
num: number | string | undefined,
|
||||||
|
locale = "en-US",
|
||||||
|
): string {
|
||||||
|
if (num == null) return "";
|
||||||
|
const cents = typeof num === "string" ? Number.parseInt(num, 10) : num;
|
||||||
|
if (Number.isNaN(cents)) return "";
|
||||||
|
const dollars = cents / 100;
|
||||||
|
const formatter = new Intl.NumberFormat(locale, {
|
||||||
|
minimumFractionDigits: 2,
|
||||||
|
maximumFractionDigits: 2,
|
||||||
|
useGrouping: true,
|
||||||
|
});
|
||||||
|
return formatter.format(dollars);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Parse Facebook marketplace search results into ListingDetails[]
|
||||||
|
*/
|
||||||
|
function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
|
||||||
|
const results: ListingDetails[] = [];
|
||||||
|
|
||||||
|
for (const adJson of ads) {
|
||||||
|
try {
|
||||||
|
const listing = adJson.node.listing;
|
||||||
|
const title = listing.marketplace_listing_title;
|
||||||
|
const priceObj = listing.listing_price;
|
||||||
|
|
||||||
|
if (!title || !priceObj) continue;
|
||||||
|
|
||||||
|
const id = listing.id;
|
||||||
|
const url = `https://www.facebook.com/marketplace/item/${id}`;
|
||||||
|
|
||||||
|
// Facebook stores price in different fields:
|
||||||
|
// - amount_with_offset_in_currency: Facebook's internal price encoding (not cents)
|
||||||
|
// - amount: dollars (like "1.00")
|
||||||
|
// - formatted_amount: human-readable price (like "CA$1")
|
||||||
|
let cents: number;
|
||||||
|
if (priceObj.amount != null) {
|
||||||
|
const dollars = typeof priceObj.amount === 'string'
|
||||||
|
? Number.parseFloat(priceObj.amount)
|
||||||
|
: priceObj.amount;
|
||||||
|
cents = Math.round(dollars * 100);
|
||||||
|
} else if (priceObj.amount_with_offset_in_currency != null) {
|
||||||
|
// Fallback: try to extract cents from amount_with_offset_in_currency
|
||||||
|
// This appears to use some exchange rate/multiplier format
|
||||||
|
const encodedAmount = Number(priceObj.amount_with_offset_in_currency);
|
||||||
|
if (!Number.isNaN(encodedAmount) && encodedAmount > 0) {
|
||||||
|
// Estimate roughly - this field doesn't contain real cents
|
||||||
|
// Use formatted_amount to get the actual dollar amount
|
||||||
|
if (priceObj.formatted_amount) {
|
||||||
|
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
|
||||||
|
if (match) {
|
||||||
|
const dollars = Number.parseFloat(match[0].replace(',', ''));
|
||||||
|
if (!Number.isNaN(dollars)) {
|
||||||
|
cents = Math.round(dollars * 100);
|
||||||
|
} else {
|
||||||
|
cents = encodedAmount; // fallback
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cents = encodedAmount; // fallback
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cents = encodedAmount; // fallback
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
continue; // Invalid price
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
continue; // No price available
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Number.isFinite(cents) || cents <= 0) continue;
|
||||||
|
|
||||||
|
// Extract address from location data if available
|
||||||
|
const cityName =
|
||||||
|
listing.location?.reverse_geocode?.city_page?.display_name;
|
||||||
|
const address = cityName || null;
|
||||||
|
|
||||||
|
// Format creation date if available
|
||||||
|
const creationDate = listing.creation_time
|
||||||
|
? new Date(listing.creation_time * 1000).toISOString()
|
||||||
|
: undefined;
|
||||||
|
|
||||||
|
const listingDetails: ListingDetails = {
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
listingPrice: {
|
||||||
|
amountFormatted: priceObj.formatted_amount || formatCentsToCurrency(cents),
|
||||||
|
cents,
|
||||||
|
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
|
||||||
|
},
|
||||||
|
address,
|
||||||
|
creationDate,
|
||||||
|
listingType: "item", // Default type for marketplace listings
|
||||||
|
};
|
||||||
|
|
||||||
|
results.push(listingDetails);
|
||||||
|
} catch {
|
||||||
|
// Skip malformed ads
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------- Main -----------------------------
|
||||||
|
|
||||||
|
export default async function fetchFacebookItems(
|
||||||
|
SEARCH_QUERY: string,
|
||||||
|
REQUESTS_PER_SECOND = 1,
|
||||||
|
LOCATION = "toronto",
|
||||||
|
MAX_ITEMS = 25,
|
||||||
|
cookiesSource?: string,
|
||||||
|
) {
|
||||||
|
// Load Facebook cookies - required for Facebook Marketplace access
|
||||||
|
const cookies = await loadFacebookCookies(cookiesSource);
|
||||||
|
if (cookies.length === 0) {
|
||||||
|
throw new Error(
|
||||||
|
"Facebook cookies are required for marketplace access. " +
|
||||||
|
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format cookies for HTTP header
|
||||||
|
const domain = "www.facebook.com";
|
||||||
|
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
||||||
|
if (!cookiesHeader) {
|
||||||
|
throw new Error(
|
||||||
|
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||||
|
|
||||||
|
// Encode search query for URL
|
||||||
|
const encodedQuery = encodeURIComponent(SEARCH_QUERY);
|
||||||
|
|
||||||
|
// Facebook marketplace URL structure
|
||||||
|
const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`;
|
||||||
|
|
||||||
|
console.log(`Fetching Facebook marketplace: ${searchUrl}`);
|
||||||
|
console.log(`Using ${cookies.length} cookies for authentication`);
|
||||||
|
|
||||||
|
let searchHtml: string;
|
||||||
|
try {
|
||||||
|
searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
|
||||||
|
onRateInfo: (remaining, reset) => {
|
||||||
|
if (remaining && reset) {
|
||||||
|
console.log(
|
||||||
|
"\n" +
|
||||||
|
`Facebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
cookies: cookiesHeader,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof HttpError) {
|
||||||
|
console.warn(
|
||||||
|
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
|
||||||
|
);
|
||||||
|
if (err.status === 400 || err.status === 401 || err.status === 403) {
|
||||||
|
console.warn(
|
||||||
|
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ads = extractFacebookMarketplaceData(searchHtml);
|
||||||
|
if (!ads || ads.length === 0) {
|
||||||
|
console.warn("No ads parsed from Facebook marketplace page.");
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\nFound ${ads.length} raw ads. Processing...`);
|
||||||
|
|
||||||
|
const progressBar = new cliProgress.SingleBar(
|
||||||
|
{},
|
||||||
|
cliProgress.Presets.shades_classic,
|
||||||
|
);
|
||||||
|
const totalProgress = ads.length;
|
||||||
|
let currentProgress = 0;
|
||||||
|
progressBar.start(totalProgress, currentProgress);
|
||||||
|
|
||||||
|
const items = parseFacebookAds(ads);
|
||||||
|
|
||||||
|
// Filter to only priced items (already done in parseFacebookAds)
|
||||||
|
const pricedItems = items.filter(
|
||||||
|
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
|
||||||
|
);
|
||||||
|
|
||||||
|
progressBar.update(totalProgress);
|
||||||
|
progressBar.stop();
|
||||||
|
|
||||||
|
console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
|
||||||
|
return pricedItems.slice(0, MAX_ITEMS); // Limit results
|
||||||
|
}
|
||||||
36
src/index.ts
36
src/index.ts
@@ -1,4 +1,5 @@
|
|||||||
import fetchKijijiItems from "@/kijiji";
|
import fetchKijijiItems from "@/kijiji";
|
||||||
|
import fetchFacebookItems from "@/facebook";
|
||||||
|
|
||||||
const PORT = process.env.PORT || 4005;
|
const PORT = process.env.PORT || 4005;
|
||||||
|
|
||||||
@@ -33,6 +34,41 @@ const server = Bun.serve({
|
|||||||
return Response.json(items, { status: 200 });
|
return Response.json(items, { status: 200 });
|
||||||
},
|
},
|
||||||
|
|
||||||
|
"/api/facebook": async (req: Request) => {
|
||||||
|
const reqUrl = new URL(req.url);
|
||||||
|
|
||||||
|
const SEARCH_QUERY =
|
||||||
|
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||||
|
if (!SEARCH_QUERY)
|
||||||
|
return Response.json(
|
||||||
|
{
|
||||||
|
message:
|
||||||
|
"Request didn't have 'query' header or 'q' search parameter!",
|
||||||
|
},
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
|
||||||
|
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
|
||||||
|
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const items = await fetchFacebookItems(SEARCH_QUERY, 5, LOCATION, 25, COOKIES_SOURCE);
|
||||||
|
if (!items || items.length === 0)
|
||||||
|
return Response.json(
|
||||||
|
{ message: "Search didn't return any results!" },
|
||||||
|
{ status: 404 },
|
||||||
|
);
|
||||||
|
return Response.json(items, { status: 200 });
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Facebook scraping error:", error);
|
||||||
|
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||||
|
return Response.json(
|
||||||
|
{ message: errorMessage },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
// Wildcard route for all routes that start with "/api/" and aren't otherwise matched
|
// Wildcard route for all routes that start with "/api/" and aren't otherwise matched
|
||||||
"/api/*": Response.json({ message: "Not found" }, { status: 404 }),
|
"/api/*": Response.json({ message: "Not found" }, { status: 404 }),
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user