feat: implement cookie priority hierarchy (URL param > env var > file) for Facebook and eBay scrapers
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
||||
|
||||
/**
|
||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
|
||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}&cookies={cookies}
|
||||
* Search eBay for listings (default: Buy It Now only, Canada only)
|
||||
* Optional: Pass cookies parameter to bypass bot detection
|
||||
*/
|
||||
export async function ebayRoute(req: Request): Promise<Response> {
|
||||
try {
|
||||
@@ -37,6 +38,7 @@ export async function ebayRoute(req: Request): Promise<Response> {
|
||||
|
||||
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
|
||||
const cookies = reqUrl.searchParams.get("cookies") || undefined;
|
||||
|
||||
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
|
||||
minPrice,
|
||||
@@ -46,6 +48,7 @@ export async function ebayRoute(req: Request): Promise<Response> {
|
||||
keywords,
|
||||
buyItNowOnly,
|
||||
canadaOnly,
|
||||
cookies,
|
||||
});
|
||||
|
||||
const results = maxItems ? items.slice(0, maxItems) : items;
|
||||
|
||||
@@ -101,13 +101,26 @@ function parseEbayListings(
|
||||
}
|
||||
|
||||
// Find the container - go up several levels to find the item container
|
||||
// Modern eBay uses complex nested structures
|
||||
let container = linkElement.parentElement?.parentElement?.parentElement;
|
||||
if (!container) {
|
||||
// Try a different level
|
||||
container = linkElement.parentElement?.parentElement;
|
||||
// Modern eBay uses complex nested structures (often 5-10 levels deep)
|
||||
let container: Element | null = linkElement;
|
||||
let depth = 0;
|
||||
const maxDepth = 15;
|
||||
|
||||
// Walk up until we find a list item or results container
|
||||
while (container && depth < maxDepth) {
|
||||
const classes = container.className || "";
|
||||
if (
|
||||
classes.includes("s-item") ||
|
||||
classes.includes("srp-results") ||
|
||||
container.tagName === "LI"
|
||||
) {
|
||||
break;
|
||||
}
|
||||
container = container.parentElement;
|
||||
depth++;
|
||||
}
|
||||
if (!container) continue;
|
||||
|
||||
if (!container || depth >= maxDepth) continue;
|
||||
|
||||
// Extract title - look for heading or title-related elements near the link
|
||||
// Modern eBay often uses h3, span, or div with text content near the link
|
||||
@@ -168,8 +181,9 @@ function parseEbayListings(
|
||||
if (title === "Shop on eBay" || title.length < 3) continue;
|
||||
|
||||
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
||||
// Updated for 2026 eBay HTML structure
|
||||
let priceElement = container.querySelector(
|
||||
'[class*="s-item__price"], .s-item__price, [class*="price"]',
|
||||
'[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]',
|
||||
);
|
||||
|
||||
// If no direct price class, look for spans containing $ (but not titles)
|
||||
@@ -305,6 +319,58 @@ function parseEbayListings(
|
||||
return results;
|
||||
}
|
||||
|
||||
// ----------------------------- Cookie Loading -----------------------------
|
||||
|
||||
/**
|
||||
* Load eBay cookies with priority: URL param > ENV var > file
|
||||
* @param cookiesSource - Optional cookie string from URL parameter (highest priority)
|
||||
* @param cookiePath - Path to cookie file (default: ./cookies/ebay.json) (lowest priority)
|
||||
* @returns Cookie string for HTTP header or undefined if no cookies found
|
||||
*/
|
||||
async function loadEbayCookies(
|
||||
cookiesSource?: string,
|
||||
cookiePath = "./cookies/ebay.json",
|
||||
): Promise<string | undefined> {
|
||||
// Priority 1: URL parameter (if provided)
|
||||
if (cookiesSource?.trim()) {
|
||||
console.log("Loaded eBay cookies from URL parameter");
|
||||
return cookiesSource.trim();
|
||||
}
|
||||
|
||||
// Priority 2: Environment variable
|
||||
const envCookies = process.env.EBAY_COOKIE;
|
||||
if (envCookies?.trim()) {
|
||||
console.log("Loaded eBay cookies from EBAY_COOKIE env var");
|
||||
return envCookies.trim();
|
||||
}
|
||||
|
||||
// Priority 3: Cookie file (fallback)
|
||||
try {
|
||||
const file = Bun.file(cookiePath);
|
||||
if (await file.exists()) {
|
||||
const content = await file.text();
|
||||
const trimmed = content.trim();
|
||||
if (trimmed) {
|
||||
console.log(`Loaded eBay cookies from ${cookiePath}`);
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
||||
}
|
||||
|
||||
// No cookies found (eBay cookies are optional, just warn)
|
||||
console.warn(
|
||||
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
|
||||
"Provide cookies via (in priority order):\n" +
|
||||
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
||||
" 2. EBAY_COOKIE environment variable, or\n" +
|
||||
" 3. ./cookies/ebay.json file (lowest priority)\n" +
|
||||
'Format: Cookie string like "name1=value1; name2=value2"',
|
||||
);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchEbayItems(
|
||||
@@ -318,6 +384,8 @@ export default async function fetchEbayItems(
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
cookies?: string; // Optional: Cookie string from URL parameter (highest priority)
|
||||
cookiePath?: string; // Optional: Path to cookie file (default: ./cookies/ebay.json)
|
||||
} = {},
|
||||
) {
|
||||
const {
|
||||
@@ -328,8 +396,13 @@ export default async function fetchEbayItems(
|
||||
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
||||
buyItNowOnly = true,
|
||||
canadaOnly = true,
|
||||
cookies: cookiesSource,
|
||||
cookiePath,
|
||||
} = opts;
|
||||
|
||||
// Load eBay cookies with priority: URL param > ENV var > file
|
||||
const cookies = await loadEbayCookies(cookiesSource, cookiePath);
|
||||
|
||||
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
||||
const urlParams = new URLSearchParams({
|
||||
_nkw: SEARCH_QUERY,
|
||||
@@ -358,7 +431,7 @@ export default async function fetchEbayItems(
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
|
||||
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||
Referer: "https://www.ebay.ca/",
|
||||
Connection: "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
@@ -369,6 +442,11 @@ export default async function fetchEbayItems(
|
||||
Priority: "u=0, i",
|
||||
};
|
||||
|
||||
// Add cookies if available (helps bypass bot detection)
|
||||
if (cookies) {
|
||||
headers.Cookie = cookies;
|
||||
}
|
||||
|
||||
const res = await fetch(searchUrl, {
|
||||
method: "GET",
|
||||
headers,
|
||||
|
||||
@@ -287,50 +287,65 @@ export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure Facebook cookies are available, parsing from env var if needed
|
||||
* Load Facebook cookies with priority: URL param > ENV var > file
|
||||
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
|
||||
* @param cookiePath - Path to cookie file (default: ./cookies/facebook.json) (lowest priority)
|
||||
*/
|
||||
export async function ensureFacebookCookies(
|
||||
cookiesSource?: string,
|
||||
cookiePath = "./cookies/facebook.json",
|
||||
): Promise<Cookie[]> {
|
||||
// First try to load existing cookies
|
||||
// Priority 1: URL parameter (if provided)
|
||||
if (cookiesSource) {
|
||||
try {
|
||||
const cookies = await loadFacebookCookies(cookiesSource);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} Facebook cookies from URL parameter`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Failed to parse cookies from URL parameter: ${e}`);
|
||||
// Continue to next priority
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 2: Environment variable
|
||||
const cookieString = process.env.FACEBOOK_COOKIE;
|
||||
if (cookieString?.trim()) {
|
||||
const cookies = parseFacebookCookieString(cookieString);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} Facebook cookies from FACEBOOK_COOKIE env var`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
console.warn("FACEBOOK_COOKIE env var contains no valid cookies");
|
||||
// Continue to next priority
|
||||
}
|
||||
|
||||
// Priority 3: Cookie file (fallback)
|
||||
try {
|
||||
const existing = await loadFacebookCookies(undefined, cookiePath);
|
||||
if (existing.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${existing.length} Facebook cookies from ${cookiePath}`,
|
||||
);
|
||||
return existing;
|
||||
}
|
||||
} catch {
|
||||
// File doesn't exist or is invalid, continue to check env var
|
||||
} catch (e) {
|
||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
||||
}
|
||||
|
||||
// Try to parse from environment variable
|
||||
const cookieString = process.env.FACEBOOK_COOKIE;
|
||||
if (!cookieString || !cookieString.trim()) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Either:\n" +
|
||||
" 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
|
||||
" 2. Create ./cookies/facebook.json manually with cookie array",
|
||||
);
|
||||
}
|
||||
|
||||
// Parse the cookie string
|
||||
const cookies = parseFacebookCookieString(cookieString);
|
||||
if (cookies.length === 0) {
|
||||
throw new Error(
|
||||
"FACEBOOK_COOKIE environment variable contains no valid cookies. " +
|
||||
'Expected format: "name1=value1; name2=value2;"',
|
||||
);
|
||||
}
|
||||
|
||||
// Save to file for future use
|
||||
try {
|
||||
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
|
||||
console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
|
||||
} catch (error) {
|
||||
console.warn(`Could not save cookies to ${cookiePath}: ${error}`);
|
||||
// Continue anyway, we have the cookies in memory
|
||||
}
|
||||
|
||||
return cookies;
|
||||
// No cookies found from any source
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Provide cookies via (in priority order):\n" +
|
||||
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
||||
" 2. FACEBOOK_COOKIE environment variable, or\n" +
|
||||
" 3. ./cookies/facebook.json file (lowest priority)\n" +
|
||||
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -964,22 +979,8 @@ export default async function fetchFacebookItems(
|
||||
cookiesSource?: string,
|
||||
cookiePath?: string,
|
||||
) {
|
||||
// Load Facebook cookies - required for Facebook Marketplace access
|
||||
let cookies: Cookie[];
|
||||
if (cookiesSource) {
|
||||
// Use provided cookie source (backward compatibility)
|
||||
cookies = await loadFacebookCookies(cookiesSource);
|
||||
} else {
|
||||
// Auto-load from file or parse from env var
|
||||
cookies = await ensureFacebookCookies(cookiePath);
|
||||
}
|
||||
|
||||
if (cookies.length === 0) {
|
||||
throw new Error(
|
||||
"Facebook cookies are required for marketplace access. " +
|
||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
|
||||
);
|
||||
}
|
||||
// Load Facebook cookies with priority: URL param > ENV var > file
|
||||
const cookies = await ensureFacebookCookies(cookiesSource, cookiePath);
|
||||
|
||||
// Format cookies for HTTP header
|
||||
const domain = "www.facebook.com";
|
||||
|
||||
@@ -207,6 +207,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
params.append("canadaOnly", args.canadaOnly.toString());
|
||||
if (args.maxItems)
|
||||
params.append("maxItems", args.maxItems.toString());
|
||||
if (args.cookies) params.append("cookies", args.cookies);
|
||||
|
||||
console.log(
|
||||
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
|
||||
|
||||
@@ -133,6 +133,11 @@ export const tools = [
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
cookies: {
|
||||
type: "string",
|
||||
description:
|
||||
"Optional: eBay session cookies to bypass bot detection (format: 'name1=value1; name2=value2')",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user