Compare commits

..

7 Commits

Author SHA1 Message Date
e4ab145d70 feat: add cookie support to kijiji scraper
Add optional cookie parameter to bypass bot detection (403 errors).
Cookies can be provided via parameter, KIJIJI_COOKIE env var, or
cookies/kijiji.json file. Supports both JSON array and string formats.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:29:13 -05:00
1dce0392e3 refactor: use shared cookie utility in ebay scraper
Replace inline cookie loading with shared utility functions.
Now supports both JSON array and cookie string formats.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:29:02 -05:00
251fcbb7d9 refactor: use shared cookie utility in facebook scraper
Replace inline cookie parsing with shared utility functions.
Maintains backward compatibility with existing exports.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:28:52 -05:00
9bc57d6b54 refactor: add shared cookie utility to core package
Move cookie parsing logic to a dedicated utility module that can be
shared across all scrapers. Supports both JSON array and cookie string
formats for all input sources (parameter, env var, file).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:28:44 -05:00
4a467c9f02 fix: support both json and string cookies for facebook 2026-01-23 19:00:51 -05:00
f944d319c2 chore: update dockerignore 2026-01-23 15:43:13 -05:00
cf9784a565 feat: implement cookie priority hierarchy (URL param > env var > file) for Facebook and eBay scrapers 2026-01-23 15:32:17 -05:00
13 changed files with 547 additions and 355 deletions

View File

@@ -1,145 +1,84 @@
# Dependencies
# =============================================================================
# Dependencies & Build Output
# =============================================================================
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*
bun.sum
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage/
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
jspm_packages/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
dist/
out/
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
# =============================================================================
# Sensitive Files
# =============================================================================
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
.env.*
.envrc
cookies/
*.pem
*.key
*.cert
*secret*
*credential*
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# =============================================================================
# Development Tools & Config
# =============================================================================
# Nix/Devenv
.devenv/
.devenv.flake.nix
devenv.*
.direnv/
# Next.js build output
.next
# Linting/Formatting
biome.json
.eslintcache
.pre-commit-config.yaml
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
public
# Vuepress build output
.vuepress/dist
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# IDE and editor files
# IDE/Editor
.vscode/
.idea/
*.swp
*.swo
*~
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# AI Assistant Config
.claude/
CLAUDE.md
AGENTS.md
opencode.jsonc
# Git
.git
# =============================================================================
# Documentation (not needed at runtime)
# =============================================================================
README.md
*.md
docs/
# =============================================================================
# Git & Docker (avoid recursive inclusion)
# =============================================================================
.git/
.gitignore
# Docker
Dockerfile*
.dockerignore
# Documentation
README.md
docs/
# Test files
# =============================================================================
# Testing & Coverage
# =============================================================================
test/
tests/
*.test.js
*.test.ts
*.spec.js
*.spec.ts
coverage/
*.lcov
.nyc_output/
# Development files
CLAUDE.md
devenv.*
# =============================================================================
# OS & Misc
# =============================================================================
.DS_Store
Thumbs.db
*.log
# Runtime cookies/config
cookies/
*.pid
.cache/
examples/
scripts/

View File

@@ -83,7 +83,7 @@ HTTP server using `Bun.serve()` on port 4005 (or `PORT` env var).
- `GET /api/status` - Health check
- `GET /api/kijiji?q={query}` - Search Kijiji
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=` - Search eBay
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=&cookies=` - Search eBay
- `GET /api/*` - 404 fallback
### MCP Server (`@marketplace-scrapers/mcp-server`)
@@ -96,7 +96,7 @@ MCP JSON-RPC 2.0 server on port 4006 (or `MCP_PORT` env var).
**Tools:**
- `search_kijiji` - Search Kijiji (query, maxItems)
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems)
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems, cookies)
## API Response Formats
@@ -117,6 +117,52 @@ All scrapers return arrays of listing objects with these common fields:
### eBay-specific fields
Minimal - mainly the common fields
## Cookie Management
Both **Facebook Marketplace** and **eBay** require valid session cookies for reliable scraping.
### Cookie Priority Hierarchy (High → Low)
All scrapers follow this loading order:
1. **URL/API Parameter** - Passed directly via `cookies` parameter (highest priority)
2. **Environment Variable** - `FACEBOOK_COOKIE` or `EBAY_COOKIE`
3. **Cookie File** - `cookies/facebook.json` or `cookies/ebay.json` (fallback)
### Facebook Cookies
- **Required for**: Facebook Marketplace scraping
- **Format**: JSON array (see `cookies/README.md`)
- **Key cookies**: `c_user`, `xs`, `fr`, `datr`, `sb`
**Setup:**
```bash
# Option 1: File (fallback)
# Create cookies/facebook.json with cookie array
# Option 2: Environment variable
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
# Option 3: URL parameter (highest priority)
curl "http://localhost:4005/api/facebook?q=laptop&cookies=[{...}]"
```
### eBay Cookies
- **Required for**: Bypassing bot detection
- **Format**: Cookie string `"name=value; name2=value2"`
- **Key cookies**: `s`, `ds2`, `ebay`, `dp1`, `nonsession`
**Setup:**
```bash
# Option 1: File (fallback)
# Create cookies/ebay.json with cookie string
# Option 2: Environment variable
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
# Option 3: URL parameter (highest priority)
curl "http://localhost:4005/api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE"
```
**Important - eBay Bot Detection**: Without cookies, eBay returns a "Checking your browser" challenge page instead of listings.
## Technical Details
- **TypeScript** with path mapping (`@/*``src/*`) per package
@@ -126,7 +172,7 @@ Minimal - mainly the common fields
## Development Notes
- Facebook requires valid session cookies - set `FACEBOOK_COOKIE` env var or create `cookies/facebook.json`
- eBay uses custom headers to bypass basic bot detection
- **Cookie files** are git-ignored for security (see `cookies/README.md`)
- Kijiji parses Apollo state from Next.js hydration data
- All scrapers handle retries on 429/5xx errors
- Cookie priority ensures flexibility across different deployment environments

View File

@@ -1,24 +1,33 @@
# Facebook Marketplace Cookies Setup
# Marketplace Cookies Setup
To use the Facebook Marketplace scraper, you need to provide valid Facebook session cookies.
Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings.
## Option 1: Cookies File (`facebook.json`)
## Cookie Priority Hierarchy
1. Log into Facebook in your browser
2. Open Developer Tools → Network tab
3. Visit facebook.com/marketplace (ensure you're logged in)
4. Look for any marketplace-related requests in the Network tab
5. Export cookies from the browser's Application/Storage → Cookies section
6. Save the cookies as a JSON array to `facebook.json`
All scrapers follow this priority order (highest to lowest):
1. **URL Parameter** - Passed directly in API/MCP request (overrides all)
2. **Environment Variable** - Set as `FACEBOOK_COOKIE` or `EBAY_COOKIE`
3. **Cookie File** - Stored in `facebook.json` or `ebay.json` (fallback)
The `facebook.json` file should contain Facebook session cookies, particularly:
---
## Facebook Marketplace (`facebook.json`)
### Required Cookies
- `c_user`: Your Facebook user ID
- `xs`: Facebook session token
- `fr`: Facebook request token
- `datr`: Data attribution token
- `sb`: Session browser token
Example structure:
### Setup Methods
**Method 1: Cookie File (Lowest Priority)**
1. Log into Facebook in your browser
2. Open Developer Tools → Application/Storage → Cookies
3. Export cookies as JSON array to `facebook.json`
Example `facebook.json`:
```json
[
{
@@ -27,26 +36,59 @@ Example structure:
"domain": ".facebook.com",
"path": "/",
"secure": true
},
// ... other cookies
}
]
```
## Option 2: URL Parameter
You can pass cookies directly via the `cookies` URL parameter:
**Method 2: Environment Variable**
```bash
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
```
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123","domain":".facebook.com",...}]
**Method 3: URL Parameter (Highest Priority)**
```
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123",...}]
```
---
## eBay (`ebay.json`)
eBay has aggressive bot detection that blocks requests without valid session cookies.
### Setup Methods
**Method 1: Cookie File (Lowest Priority)**
1. Log into eBay in your browser
2. Open Developer Tools → Network tab
3. Visit ebay.ca and inspect any request headers
4. Copy the full `Cookie` header value
5. Save as plain text to `ebay.json` (see `ebay.json.example`)
Example `ebay.json`:
```
s=VALUE; ds2=VALUE; ebay=VALUE; dp1=VALUE; nonsession=VALUE
```
**Method 2: Environment Variable**
```bash
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
```
**Method 3: URL Parameter (Highest Priority)**
```
GET /api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE;ebay=VALUE
```
---
## Important Notes
- Cookies must be from an active Facebook session
- Cookies expire, so you may need to refresh them periodically
- Never share real cookies or commit them to version control
- Facebook may block automated scraping even with valid cookies
- Cookies must be from active browser sessions
- Cookies expire and need periodic refresh
- **NEVER** commit real cookies to version control
- Platforms may still block automated scraping despite valid cookies
## Security
The cookies file is intentionally left out of version control for security reasons.</content>
All `*.json` files in this directory are git-ignored for security.</content>

View File

@@ -0,0 +1 @@
s=YOUR_VALUE; ds2=YOUR_VALUE; ebay=YOUR_VALUE; dp1=YOUR_VALUE; nonsession=YOUR_VALUE

View File

@@ -1,8 +1,9 @@
import { fetchEbayItems } from "@marketplace-scrapers/core";
/**
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}&cookies={cookies}
* Search eBay for listings (default: Buy It Now only, Canada only)
* Optional: Pass cookies parameter to bypass bot detection
*/
export async function ebayRoute(req: Request): Promise<Response> {
try {
@@ -37,6 +38,7 @@ export async function ebayRoute(req: Request): Promise<Response> {
const maxItemsParam = reqUrl.searchParams.get("maxItems");
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
const cookies = reqUrl.searchParams.get("cookies") || undefined;
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
minPrice,
@@ -46,6 +48,7 @@ export async function ebayRoute(req: Request): Promise<Response> {
keywords,
buyItNowOnly,
canadaOnly,
cookies,
});
const results = maxItems ? items.slice(0, maxItems) : items;

View File

@@ -41,6 +41,7 @@ export async function kijijiRoute(req: Request): Promise<Response> {
maxPages,
priceMin,
priceMax,
cookies: reqUrl.searchParams.get("cookies") || undefined,
};
try {

View File

@@ -36,7 +36,8 @@ export {
} from "./scrapers/kijiji";
// Export shared types
export * from "./types/common";
// Export shared utilities
export * from "./utils/cookies";
export * from "./utils/delay";
export * from "./utils/format";
// Export shared utilities
export * from "./utils/http";

View File

@@ -1,6 +1,19 @@
import { parseHTML } from "linkedom";
import {
type CookieConfig,
formatCookiesForHeader,
loadCookiesOptional,
} from "../utils/cookies";
import { delay } from "../utils/delay";
// eBay cookie configuration
const EBAY_COOKIE_CONFIG: CookieConfig = {
name: "eBay",
domain: ".ebay.ca",
envVar: "EBAY_COOKIE",
filePath: "./cookies/ebay.json",
};
// ----------------------------- Types -----------------------------
export interface EbayListingDetails {
@@ -101,13 +114,26 @@ function parseEbayListings(
}
// Find the container - go up several levels to find the item container
// Modern eBay uses complex nested structures
let container = linkElement.parentElement?.parentElement?.parentElement;
if (!container) {
// Try a different level
container = linkElement.parentElement?.parentElement;
// Modern eBay uses complex nested structures (often 5-10 levels deep)
let container: Element | null = linkElement;
let depth = 0;
const maxDepth = 15;
// Walk up until we find a list item or results container
while (container && depth < maxDepth) {
const classes = container.className || "";
if (
classes.includes("s-item") ||
classes.includes("srp-results") ||
container.tagName === "LI"
) {
break;
}
container = container.parentElement;
depth++;
}
if (!container) continue;
if (!container || depth >= maxDepth) continue;
// Extract title - look for heading or title-related elements near the link
// Modern eBay often uses h3, span, or div with text content near the link
@@ -168,8 +194,9 @@ function parseEbayListings(
if (title === "Shop on eBay" || title.length < 3) continue;
// Extract price - look for eBay's price classes, preferring sale/discount prices
// Updated for 2026 eBay HTML structure
let priceElement = container.querySelector(
'[class*="s-item__price"], .s-item__price, [class*="price"]',
'[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]',
);
// If no direct price class, look for spans containing $ (but not titles)
@@ -305,6 +332,32 @@ function parseEbayListings(
return results;
}
// ----------------------------- Cookie Loading -----------------------------
/**
* Load eBay cookies with priority: URL param > ENV var > file
* Uses shared cookie utility for consistent handling across all scrapers
*/
async function loadEbayCookies(
cookiesSource?: string,
): Promise<string | undefined> {
const cookies = await loadCookiesOptional(EBAY_COOKIE_CONFIG, cookiesSource);
if (cookies.length === 0) {
console.warn(
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
"Provide cookies via (in priority order):\n" +
" 1. 'cookies' URL parameter (highest priority), or\n" +
" 2. EBAY_COOKIE environment variable, or\n" +
" 3. ./cookies/ebay.json file (lowest priority)\n" +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
return undefined;
}
return formatCookiesForHeader(cookies, "www.ebay.ca");
}
// ----------------------------- Main -----------------------------
export default async function fetchEbayItems(
@@ -318,6 +371,7 @@ export default async function fetchEbayItems(
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
} = {},
) {
const {
@@ -328,8 +382,12 @@ export default async function fetchEbayItems(
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
buyItNowOnly = true,
canadaOnly = true,
cookies: cookiesSource,
} = opts;
// Load eBay cookies with priority: URL param > ENV var > file
const cookies = await loadEbayCookies(cookiesSource);
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
const urlParams = new URLSearchParams({
_nkw: SEARCH_QUERY,
@@ -358,7 +416,7 @@ export default async function fetchEbayItems(
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Encoding": "gzip, deflate, br, zstd",
Referer: "https://www.ebay.ca/",
Connection: "keep-alive",
"Upgrade-Insecure-Requests": "1",
@@ -369,6 +427,11 @@ export default async function fetchEbayItems(
Priority: "u=0, i",
};
// Add cookies if available (helps bypass bot detection)
if (cookies) {
headers.Cookie = cookies;
}
const res = await fetch(searchUrl, {
method: "GET",
headers,

View File

@@ -1,6 +1,13 @@
import cliProgress from "cli-progress";
import { parseHTML } from "linkedom";
import type { HTMLString } from "../types/common";
import {
type Cookie,
type CookieConfig,
ensureCookies,
formatCookiesForHeader,
parseCookieString,
} from "../utils/cookies";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import { isRecord } from "../utils/http";
@@ -13,21 +20,13 @@ import { isRecord } from "../utils/http";
* This is by design to respect Facebook's authentication requirements.
*/
// ----------------------------- Types -----------------------------
interface Cookie {
name: string;
value: string;
domain: string;
path: string;
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
// Facebook cookie configuration
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
name: "Facebook",
domain: ".facebook.com",
envVar: "FACEBOOK_COOKIE",
filePath: "./cookies/facebook.json",
};
interface FacebookAdNode {
node: {
@@ -203,164 +202,24 @@ export interface FacebookListingDetails {
// ----------------------------- Utilities -----------------------------
/**
* Load Facebook cookies from file or string
*/
async function loadFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load from provided string parameter
if (cookiesSource) {
try {
const cookies = JSON.parse(cookiesSource);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
} catch (e) {
throw new Error(`Invalid cookies JSON provided: ${e}`);
}
}
// Try to load from specified path
try {
const cookiesPath = cookiePath;
const file = Bun.file(cookiesPath);
if (await file.exists()) {
const content = await file.text();
const cookies = JSON.parse(content);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
}
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
return [];
}
/**
* Parse Facebook cookie string into Cookie array format
* @deprecated Use parseCookieString from utils/cookies instead
*/
export function parseFacebookCookieString(cookieString: string): Cookie[] {
if (!cookieString || !cookieString.trim()) {
return [];
}
return cookieString
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, value] = pair.split("=", 2);
const trimmedName = name.trim();
const trimmedValue = value.trim();
// Skip empty names or values
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain: ".facebook.com",
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax" as const,
expirationDate: undefined, // Session cookies
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
}
/**
* Ensure Facebook cookies are available, parsing from env var if needed
* Load Facebook cookies with priority: URL param > ENV var > file
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
* @param _cookiePath - Deprecated, uses default path from config
*/
export async function ensureFacebookCookies(
cookiePath = "./cookies/facebook.json",
cookiesSource?: string,
_cookiePath?: string,
): Promise<Cookie[]> {
// First try to load existing cookies
try {
const existing = await loadFacebookCookies(undefined, cookiePath);
if (existing.length > 0) {
return existing;
}
} catch {
// File doesn't exist or is invalid, continue to check env var
}
// Try to parse from environment variable
const cookieString = process.env.FACEBOOK_COOKIE;
if (!cookieString || !cookieString.trim()) {
throw new Error(
"No valid Facebook cookies found. Either:\n" +
" 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
" 2. Create ./cookies/facebook.json manually with cookie array",
);
}
// Parse the cookie string
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length === 0) {
throw new Error(
"FACEBOOK_COOKIE environment variable contains no valid cookies. " +
'Expected format: "name1=value1; name2=value2;"',
);
}
// Save to file for future use
try {
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
} catch (error) {
console.warn(`Could not save cookies to ${cookiePath}: ${error}`);
// Continue anyway, we have the cookies in memory
}
return cookies;
}
/**
* Format cookies array into Cookie header string
*/
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
domain.endsWith(cookie.domain.slice(1)) ||
domain === cookie.domain.slice(1)
);
}
// Host-only cookie
return cookie.domain === domain;
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false; // Expired
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
}
class HttpError extends Error {
@@ -964,22 +823,8 @@ export default async function fetchFacebookItems(
cookiesSource?: string,
cookiePath?: string,
) {
// Load Facebook cookies - required for Facebook Marketplace access
let cookies: Cookie[];
if (cookiesSource) {
// Use provided cookie source (backward compatibility)
cookies = await loadFacebookCookies(cookiesSource);
} else {
// Auto-load from file or parse from env var
cookies = await ensureFacebookCookies(cookiePath);
}
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
);
}
// Load Facebook cookies with priority: URL param > ENV var > file
const cookies = await ensureFacebookCookies(cookiesSource, cookiePath);
// Format cookies for HTTP header
const domain = "www.facebook.com";
@@ -1065,28 +910,13 @@ export default async function fetchFacebookItems(
export async function fetchFacebookItem(
itemId: string,
cookiesSource?: string,
cookiePath?: string,
_cookiePath?: string,
): Promise<FacebookListingDetails | null> {
// Load Facebook cookies - required for Facebook Marketplace access
let cookies: Cookie[];
if (cookiesSource) {
// Use provided cookie source (backward compatibility)
cookies = await loadFacebookCookies(cookiesSource);
} else {
// Auto-load from file or parse from env var
cookies = await ensureFacebookCookies(cookiePath);
}
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
);
}
const cookies = await ensureFacebookCookies(cookiesSource);
// Format cookies for HTTP header
const domain = "www.facebook.com";
const cookiesHeader = formatCookiesForHeader(cookies, domain);
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
if (!cookiesHeader) {
throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",

View File

@@ -2,6 +2,11 @@ import cliProgress from "cli-progress";
import { parseHTML } from "linkedom";
import unidecode from "unidecode";
import type { HTMLString } from "../types/common";
import {
type CookieConfig,
formatCookiesForHeader,
loadCookiesOptional,
} from "../utils/cookies";
import { formatCentsToCurrency } from "../utils/format";
import {
fetchHtml,
@@ -13,6 +18,14 @@ import {
ValidationError,
} from "../utils/http";
// Kijiji cookie configuration
const KIJIJI_COOKIE_CONFIG: CookieConfig = {
name: "Kijiji",
domain: ".kijiji.ca",
envVar: "KIJIJI_COOKIE",
filePath: "./cookies/kijiji.json",
};
// ----------------------------- Types -----------------------------
type SearchListing = {
@@ -110,6 +123,7 @@ export interface SearchOptions {
maxPages?: number; // Default: 5
priceMin?: number;
priceMax?: number;
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
}
export interface ListingFetchOptions {
@@ -691,6 +705,16 @@ export default async function fetchKijijiItems(
) {
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
// Load Kijiji cookies (optional - helps bypass bot detection)
const cookies = await loadCookiesOptional(
KIJIJI_COOKIE_CONFIG,
searchOptions.cookies,
);
const cookieHeader =
cookies.length > 0
? formatCookiesForHeader(cookies, "www.kijiji.ca")
: undefined;
// Set defaults for configuration
const finalSearchOptions: Required<SearchOptions> = {
location: searchOptions.location ?? 1700272, // Default to GTA
@@ -701,6 +725,7 @@ export default async function fetchKijijiItems(
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
priceMin: searchOptions.priceMin as number,
priceMax: searchOptions.priceMax as number,
cookies: searchOptions.cookies ?? "",
};
const finalListingOptions: Required<ListingFetchOptions> = {
@@ -733,6 +758,7 @@ export default async function fetchKijijiItems(
);
}
},
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
});
const searchResults = parseSearch(searchHtml, BASE_URL);
@@ -782,6 +808,7 @@ export default async function fetchKijijiItems(
);
}
},
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
});
const parsed = await parseDetailedListing(
html,

View File

@@ -0,0 +1,227 @@
/**
* Shared cookie handling utilities for marketplace scrapers
*/
export interface Cookie {
name: string;
value: string;
domain: string;
path: string;
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
export interface CookieConfig {
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
name: string;
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
domain: string;
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
envVar: string;
/** Path to cookie file (e.g., "./cookies/facebook.json") */
filePath: string;
}
/**
* Parse cookie string format into Cookie array
* Supports format: "name1=value1; name2=value2"
*/
export function parseCookieString(
cookieString: string,
domain: string,
): Cookie[] {
if (!cookieString?.trim()) {
return [];
}
return cookieString
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, ...valueParts] = pair.split("=");
const trimmedName = name.trim();
const trimmedValue = valueParts.join("=").trim();
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain,
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax" as const,
expirationDate: undefined,
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
}
/**
* Parse JSON array format into Cookie array
* Supports format: [{"name": "foo", "value": "bar", ...}]
*/
export function parseJsonCookies(jsonString: string): Cookie[] {
const parsed = JSON.parse(jsonString);
if (!Array.isArray(parsed)) {
return [];
}
return parsed.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
/**
* Try to parse cookies from a string (tries JSON first, then cookie string format)
*/
export function parseCookiesAuto(
input: string,
defaultDomain: string,
): Cookie[] {
// Try JSON array format first
try {
const cookies = parseJsonCookies(input);
if (cookies.length > 0) {
return cookies;
}
} catch {
// JSON parse failed, try cookie string format
}
// Try cookie string format
return parseCookieString(input, defaultDomain);
}
/**
* Load cookies from file (supports both JSON array and cookie string formats)
*/
export async function loadCookiesFromFile(
filePath: string,
defaultDomain: string,
): Promise<Cookie[]> {
const file = Bun.file(filePath);
if (!(await file.exists())) {
return [];
}
const content = await file.text();
return parseCookiesAuto(content.trim(), defaultDomain);
}
/**
* Format cookies array into Cookie header string for HTTP requests
*/
export function formatCookiesForHeader(
cookies: Cookie[],
targetDomain: string,
): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
targetDomain.endsWith(cookie.domain.slice(1)) ||
targetDomain === cookie.domain.slice(1)
);
}
// Host-only cookie
return cookie.domain === targetDomain;
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false;
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
}
/**
* Load cookies with priority: URL param > ENV var > file
* Supports both JSON array and cookie string formats for all sources
*/
export async function ensureCookies(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
// Priority 1: URL/API parameter (if provided)
if (cookiesSource) {
const cookies = parseCookiesAuto(cookiesSource, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
);
return cookies;
}
console.warn(
`${config.name} cookies parameter provided but no valid cookies extracted`,
);
}
// Priority 2: Environment variable
const envValue = process.env[config.envVar];
if (envValue?.trim()) {
const cookies = parseCookiesAuto(envValue, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
);
return cookies;
}
console.warn(`${config.envVar} env var contains no valid cookies`);
}
// Priority 3: Cookie file (fallback)
try {
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
);
return cookies;
}
} catch (e) {
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
}
// No cookies found from any source
throw new Error(
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
` 1. 'cookies' parameter (highest priority), or\n` +
` 2. ${config.envVar} environment variable, or\n` +
` 3. ${config.filePath} file (lowest priority)\n` +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
}
/**
* Try to load cookies, return empty array if none found (non-throwing version)
*/
export async function loadCookiesOptional(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
try {
return await ensureCookies(config, cookiesSource);
} catch {
return [];
}
}

View File

@@ -115,6 +115,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
params.append("priceMin", args.priceMin.toString());
if (args.priceMax)
params.append("priceMax", args.priceMax.toString());
if (args.cookies) params.append("cookies", args.cookies);
console.log(
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
@@ -207,6 +208,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
params.append("canadaOnly", args.canadaOnly.toString());
if (args.maxItems)
params.append("maxItems", args.maxItems.toString());
if (args.cookies) params.append("cookies", args.cookies);
console.log(
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,

View File

@@ -52,6 +52,11 @@ export const tools = [
type: "number",
description: "Maximum price in cents",
},
cookies: {
type: "string",
description:
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
},
},
required: ["query"],
},
@@ -133,6 +138,11 @@ export const tools = [
description: "Maximum number of items to return",
default: 5,
},
cookies: {
type: "string",
description:
"Optional: eBay session cookies to bypass bot detection (format: 'name1=value1; name2=value2')",
},
},
required: ["query"],
},