diff --git a/AGENTS.md b/AGENTS.md index 91b6d91..8bff304 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -83,7 +83,7 @@ HTTP server using `Bun.serve()` on port 4005 (or `PORT` env var). - `GET /api/status` - Health check - `GET /api/kijiji?q={query}` - Search Kijiji - `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook -- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=` - Search eBay +- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=&cookies=` - Search eBay - `GET /api/*` - 404 fallback ### MCP Server (`@marketplace-scrapers/mcp-server`) @@ -96,7 +96,7 @@ MCP JSON-RPC 2.0 server on port 4006 (or `MCP_PORT` env var). **Tools:** - `search_kijiji` - Search Kijiji (query, maxItems) - `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource) -- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems) +- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems, cookies) ## API Response Formats @@ -117,6 +117,52 @@ All scrapers return arrays of listing objects with these common fields: ### eBay-specific fields Minimal - mainly the common fields +## Cookie Management + +Both **Facebook Marketplace** and **eBay** require valid session cookies for reliable scraping. + +### Cookie Priority Hierarchy (High → Low) +All scrapers follow this loading order: +1. **URL/API Parameter** - Passed directly via `cookies` parameter (highest priority) +2. **Environment Variable** - `FACEBOOK_COOKIE` or `EBAY_COOKIE` +3. **Cookie File** - `cookies/facebook.json` or `cookies/ebay.json` (fallback) + +### Facebook Cookies +- **Required for**: Facebook Marketplace scraping +- **Format**: JSON array (see `cookies/README.md`) +- **Key cookies**: `c_user`, `xs`, `fr`, `datr`, `sb` + +**Setup:** +```bash +# Option 1: File (fallback) +# Create cookies/facebook.json with cookie array + +# Option 2: Environment variable +export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request' + +# Option 3: URL parameter (highest priority) +curl "http://localhost:4005/api/facebook?q=laptop&cookies=[{...}]" +``` + +### eBay Cookies +- **Required for**: Bypassing bot detection +- **Format**: Cookie string `"name=value; name2=value2"` +- **Key cookies**: `s`, `ds2`, `ebay`, `dp1`, `nonsession` + +**Setup:** +```bash +# Option 1: File (fallback) +# Create cookies/ebay.json with cookie string + +# Option 2: Environment variable +export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE' + +# Option 3: URL parameter (highest priority) +curl "http://localhost:4005/api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE" +``` + +**Important - eBay Bot Detection**: Without cookies, eBay returns a "Checking your browser" challenge page instead of listings. + ## Technical Details - **TypeScript** with path mapping (`@/*` → `src/*`) per package @@ -126,7 +172,7 @@ Minimal - mainly the common fields ## Development Notes -- Facebook requires valid session cookies - set `FACEBOOK_COOKIE` env var or create `cookies/facebook.json` -- eBay uses custom headers to bypass basic bot detection +- **Cookie files** are git-ignored for security (see `cookies/README.md`) - Kijiji parses Apollo state from Next.js hydration data - All scrapers handle retries on 429/5xx errors +- Cookie priority ensures flexibility across different deployment environments diff --git a/cookies/README.md b/cookies/README.md index 05dd2c0..d6fd952 100644 --- a/cookies/README.md +++ b/cookies/README.md @@ -1,24 +1,33 @@ -# Facebook Marketplace Cookies Setup +# Marketplace Cookies Setup -To use the Facebook Marketplace scraper, you need to provide valid Facebook session cookies. +Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings. -## Option 1: Cookies File (`facebook.json`) +## Cookie Priority Hierarchy -1. Log into Facebook in your browser -2. Open Developer Tools → Network tab -3. Visit facebook.com/marketplace (ensure you're logged in) -4. Look for any marketplace-related requests in the Network tab -5. Export cookies from the browser's Application/Storage → Cookies section -6. Save the cookies as a JSON array to `facebook.json` +All scrapers follow this priority order (highest to lowest): +1. **URL Parameter** - Passed directly in API/MCP request (overrides all) +2. **Environment Variable** - Set as `FACEBOOK_COOKIE` or `EBAY_COOKIE` +3. **Cookie File** - Stored in `facebook.json` or `ebay.json` (fallback) -The `facebook.json` file should contain Facebook session cookies, particularly: +--- + +## Facebook Marketplace (`facebook.json`) + +### Required Cookies - `c_user`: Your Facebook user ID - `xs`: Facebook session token - `fr`: Facebook request token - `datr`: Data attribution token - `sb`: Session browser token -Example structure: +### Setup Methods + +**Method 1: Cookie File (Lowest Priority)** +1. Log into Facebook in your browser +2. Open Developer Tools → Application/Storage → Cookies +3. Export cookies as JSON array to `facebook.json` + +Example `facebook.json`: ```json [ { @@ -27,26 +36,59 @@ Example structure: "domain": ".facebook.com", "path": "/", "secure": true - }, - // ... other cookies + } ] ``` -## Option 2: URL Parameter - -You can pass cookies directly via the `cookies` URL parameter: - +**Method 2: Environment Variable** +```bash +export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request' ``` -GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123","domain":".facebook.com",...}] + +**Method 3: URL Parameter (Highest Priority)** ``` +GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123",...}] +``` + +--- + +## eBay (`ebay.json`) + +eBay has aggressive bot detection that blocks requests without valid session cookies. + +### Setup Methods + +**Method 1: Cookie File (Lowest Priority)** +1. Log into eBay in your browser +2. Open Developer Tools → Network tab +3. Visit ebay.ca and inspect any request headers +4. Copy the full `Cookie` header value +5. Save as plain text to `ebay.json` (see `ebay.json.example`) + +Example `ebay.json`: +``` +s=VALUE; ds2=VALUE; ebay=VALUE; dp1=VALUE; nonsession=VALUE +``` + +**Method 2: Environment Variable** +```bash +export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE' +``` + +**Method 3: URL Parameter (Highest Priority)** +``` +GET /api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE;ebay=VALUE +``` + +--- ## Important Notes -- Cookies must be from an active Facebook session -- Cookies expire, so you may need to refresh them periodically -- Never share real cookies or commit them to version control -- Facebook may block automated scraping even with valid cookies +- Cookies must be from active browser sessions +- Cookies expire and need periodic refresh +- **NEVER** commit real cookies to version control +- Platforms may still block automated scraping despite valid cookies ## Security -The cookies file is intentionally left out of version control for security reasons. +All `*.json` files in this directory are git-ignored for security. diff --git a/cookies/ebay.json.example b/cookies/ebay.json.example new file mode 100644 index 0000000..6ca5a9e --- /dev/null +++ b/cookies/ebay.json.example @@ -0,0 +1 @@ +s=YOUR_VALUE; ds2=YOUR_VALUE; ebay=YOUR_VALUE; dp1=YOUR_VALUE; nonsession=YOUR_VALUE diff --git a/packages/api-server/src/routes/ebay.ts b/packages/api-server/src/routes/ebay.ts index 4219310..42cc83d 100644 --- a/packages/api-server/src/routes/ebay.ts +++ b/packages/api-server/src/routes/ebay.ts @@ -1,8 +1,9 @@ import { fetchEbayItems } from "@marketplace-scrapers/core"; /** - * GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly} + * GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}&cookies={cookies} * Search eBay for listings (default: Buy It Now only, Canada only) + * Optional: Pass cookies parameter to bypass bot detection */ export async function ebayRoute(req: Request): Promise { try { @@ -37,6 +38,7 @@ export async function ebayRoute(req: Request): Promise { const maxItemsParam = reqUrl.searchParams.get("maxItems"); const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined; + const cookies = reqUrl.searchParams.get("cookies") || undefined; const items = await fetchEbayItems(SEARCH_QUERY, 1, { minPrice, @@ -46,6 +48,7 @@ export async function ebayRoute(req: Request): Promise { keywords, buyItNowOnly, canadaOnly, + cookies, }); const results = maxItems ? items.slice(0, maxItems) : items; diff --git a/packages/core/src/scrapers/ebay.ts b/packages/core/src/scrapers/ebay.ts index 7c72030..470ae84 100644 --- a/packages/core/src/scrapers/ebay.ts +++ b/packages/core/src/scrapers/ebay.ts @@ -101,13 +101,26 @@ function parseEbayListings( } // Find the container - go up several levels to find the item container - // Modern eBay uses complex nested structures - let container = linkElement.parentElement?.parentElement?.parentElement; - if (!container) { - // Try a different level - container = linkElement.parentElement?.parentElement; + // Modern eBay uses complex nested structures (often 5-10 levels deep) + let container: Element | null = linkElement; + let depth = 0; + const maxDepth = 15; + + // Walk up until we find a list item or results container + while (container && depth < maxDepth) { + const classes = container.className || ""; + if ( + classes.includes("s-item") || + classes.includes("srp-results") || + container.tagName === "LI" + ) { + break; + } + container = container.parentElement; + depth++; } - if (!container) continue; + + if (!container || depth >= maxDepth) continue; // Extract title - look for heading or title-related elements near the link // Modern eBay often uses h3, span, or div with text content near the link @@ -168,8 +181,9 @@ function parseEbayListings( if (title === "Shop on eBay" || title.length < 3) continue; // Extract price - look for eBay's price classes, preferring sale/discount prices + // Updated for 2026 eBay HTML structure let priceElement = container.querySelector( - '[class*="s-item__price"], .s-item__price, [class*="price"]', + '[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]', ); // If no direct price class, look for spans containing $ (but not titles) @@ -305,6 +319,58 @@ function parseEbayListings( return results; } +// ----------------------------- Cookie Loading ----------------------------- + +/** + * Load eBay cookies with priority: URL param > ENV var > file + * @param cookiesSource - Optional cookie string from URL parameter (highest priority) + * @param cookiePath - Path to cookie file (default: ./cookies/ebay.json) (lowest priority) + * @returns Cookie string for HTTP header or undefined if no cookies found + */ +async function loadEbayCookies( + cookiesSource?: string, + cookiePath = "./cookies/ebay.json", +): Promise { + // Priority 1: URL parameter (if provided) + if (cookiesSource?.trim()) { + console.log("Loaded eBay cookies from URL parameter"); + return cookiesSource.trim(); + } + + // Priority 2: Environment variable + const envCookies = process.env.EBAY_COOKIE; + if (envCookies?.trim()) { + console.log("Loaded eBay cookies from EBAY_COOKIE env var"); + return envCookies.trim(); + } + + // Priority 3: Cookie file (fallback) + try { + const file = Bun.file(cookiePath); + if (await file.exists()) { + const content = await file.text(); + const trimmed = content.trim(); + if (trimmed) { + console.log(`Loaded eBay cookies from ${cookiePath}`); + return trimmed; + } + } + } catch (e) { + console.warn(`Could not load cookies from ${cookiePath}: ${e}`); + } + + // No cookies found (eBay cookies are optional, just warn) + console.warn( + "No eBay cookies found. eBay may block requests without valid session cookies.\n" + + "Provide cookies via (in priority order):\n" + + " 1. 'cookies' URL parameter (highest priority), or\n" + + " 2. EBAY_COOKIE environment variable, or\n" + + " 3. ./cookies/ebay.json file (lowest priority)\n" + + 'Format: Cookie string like "name1=value1; name2=value2"', + ); + return undefined; +} + // ----------------------------- Main ----------------------------- export default async function fetchEbayItems( @@ -318,6 +384,8 @@ export default async function fetchEbayItems( keywords?: string[]; buyItNowOnly?: boolean; canadaOnly?: boolean; + cookies?: string; // Optional: Cookie string from URL parameter (highest priority) + cookiePath?: string; // Optional: Path to cookie file (default: ./cookies/ebay.json) } = {}, ) { const { @@ -328,8 +396,13 @@ export default async function fetchEbayItems( keywords = [SEARCH_QUERY], // Default to search query if no keywords provided buyItNowOnly = true, canadaOnly = true, + cookies: cookiesSource, + cookiePath, } = opts; + // Load eBay cookies with priority: URL param > ENV var > file + const cookies = await loadEbayCookies(cookiesSource, cookiePath); + // Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference const urlParams = new URLSearchParams({ _nkw: SEARCH_QUERY, @@ -358,7 +431,7 @@ export default async function fetchEbayItems( "Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", - "Accept-Encoding": "gzip, deflate, br", + "Accept-Encoding": "gzip, deflate, br, zstd", Referer: "https://www.ebay.ca/", Connection: "keep-alive", "Upgrade-Insecure-Requests": "1", @@ -369,6 +442,11 @@ export default async function fetchEbayItems( Priority: "u=0, i", }; + // Add cookies if available (helps bypass bot detection) + if (cookies) { + headers.Cookie = cookies; + } + const res = await fetch(searchUrl, { method: "GET", headers, diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index 54fb443..b2979fe 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -287,50 +287,65 @@ export function parseFacebookCookieString(cookieString: string): Cookie[] { } /** - * Ensure Facebook cookies are available, parsing from env var if needed + * Load Facebook cookies with priority: URL param > ENV var > file + * @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority) + * @param cookiePath - Path to cookie file (default: ./cookies/facebook.json) (lowest priority) */ export async function ensureFacebookCookies( + cookiesSource?: string, cookiePath = "./cookies/facebook.json", ): Promise { - // First try to load existing cookies + // Priority 1: URL parameter (if provided) + if (cookiesSource) { + try { + const cookies = await loadFacebookCookies(cookiesSource); + if (cookies.length > 0) { + console.log( + `Loaded ${cookies.length} Facebook cookies from URL parameter`, + ); + return cookies; + } + } catch (e) { + console.warn(`Failed to parse cookies from URL parameter: ${e}`); + // Continue to next priority + } + } + + // Priority 2: Environment variable + const cookieString = process.env.FACEBOOK_COOKIE; + if (cookieString?.trim()) { + const cookies = parseFacebookCookieString(cookieString); + if (cookies.length > 0) { + console.log( + `Loaded ${cookies.length} Facebook cookies from FACEBOOK_COOKIE env var`, + ); + return cookies; + } + console.warn("FACEBOOK_COOKIE env var contains no valid cookies"); + // Continue to next priority + } + + // Priority 3: Cookie file (fallback) try { const existing = await loadFacebookCookies(undefined, cookiePath); if (existing.length > 0) { + console.log( + `Loaded ${existing.length} Facebook cookies from ${cookiePath}`, + ); return existing; } - } catch { - // File doesn't exist or is invalid, continue to check env var + } catch (e) { + console.warn(`Could not load cookies from ${cookiePath}: ${e}`); } - // Try to parse from environment variable - const cookieString = process.env.FACEBOOK_COOKIE; - if (!cookieString || !cookieString.trim()) { - throw new Error( - "No valid Facebook cookies found. Either:\n" + - " 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" + - " 2. Create ./cookies/facebook.json manually with cookie array", - ); - } - - // Parse the cookie string - const cookies = parseFacebookCookieString(cookieString); - if (cookies.length === 0) { - throw new Error( - "FACEBOOK_COOKIE environment variable contains no valid cookies. " + - 'Expected format: "name1=value1; name2=value2;"', - ); - } - - // Save to file for future use - try { - await Bun.write(cookiePath, JSON.stringify(cookies, null, 2)); - console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`); - } catch (error) { - console.warn(`Could not save cookies to ${cookiePath}: ${error}`); - // Continue anyway, we have the cookies in memory - } - - return cookies; + // No cookies found from any source + throw new Error( + "No valid Facebook cookies found. Provide cookies via (in priority order):\n" + + " 1. 'cookies' URL parameter (highest priority), or\n" + + " 2. FACEBOOK_COOKIE environment variable, or\n" + + " 3. ./cookies/facebook.json file (lowest priority)\n" + + 'Format: JSON array or cookie string like "name1=value1; name2=value2"', + ); } /** @@ -964,22 +979,8 @@ export default async function fetchFacebookItems( cookiesSource?: string, cookiePath?: string, ) { - // Load Facebook cookies - required for Facebook Marketplace access - let cookies: Cookie[]; - if (cookiesSource) { - // Use provided cookie source (backward compatibility) - cookies = await loadFacebookCookies(cookiesSource); - } else { - // Auto-load from file or parse from env var - cookies = await ensureFacebookCookies(cookiePath); - } - - if (cookies.length === 0) { - throw new Error( - "Facebook cookies are required for marketplace access. " + - "Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.", - ); - } + // Load Facebook cookies with priority: URL param > ENV var > file + const cookies = await ensureFacebookCookies(cookiesSource, cookiePath); // Format cookies for HTTP header const domain = "www.facebook.com"; diff --git a/packages/mcp-server/src/protocol/handler.ts b/packages/mcp-server/src/protocol/handler.ts index 7de29ff..2a41249 100644 --- a/packages/mcp-server/src/protocol/handler.ts +++ b/packages/mcp-server/src/protocol/handler.ts @@ -207,6 +207,7 @@ export async function handleMcpRequest(req: Request): Promise { params.append("canadaOnly", args.canadaOnly.toString()); if (args.maxItems) params.append("maxItems", args.maxItems.toString()); + if (args.cookies) params.append("cookies", args.cookies); console.log( `[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`, diff --git a/packages/mcp-server/src/protocol/tools.ts b/packages/mcp-server/src/protocol/tools.ts index 512bc72..60a52fc 100644 --- a/packages/mcp-server/src/protocol/tools.ts +++ b/packages/mcp-server/src/protocol/tools.ts @@ -133,6 +133,11 @@ export const tools = [ description: "Maximum number of items to return", default: 5, }, + cookies: { + type: "string", + description: + "Optional: eBay session cookies to bypass bot detection (format: 'name1=value1; name2=value2')", + }, }, required: ["query"], },