Compare commits
7 Commits
df0c528535
...
update
| Author | SHA1 | Date | |
|---|---|---|---|
| e4ab145d70 | |||
| 1dce0392e3 | |||
| 251fcbb7d9 | |||
| 9bc57d6b54 | |||
| 4a467c9f02 | |||
| f944d319c2 | |||
| cf9784a565 |
181
.dockerignore
181
.dockerignore
@@ -1,145 +1,84 @@
|
|||||||
# Dependencies
|
# =============================================================================
|
||||||
|
# Dependencies & Build Output
|
||||||
|
# =============================================================================
|
||||||
node_modules/
|
node_modules/
|
||||||
npm-debug.log*
|
dist/
|
||||||
yarn-debug.log*
|
out/
|
||||||
yarn-error.log*
|
|
||||||
bun.sum
|
|
||||||
|
|
||||||
# Runtime data
|
|
||||||
pids
|
|
||||||
*.pid
|
|
||||||
*.seed
|
|
||||||
*.pid.lock
|
|
||||||
|
|
||||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
|
||||||
lib-cov
|
|
||||||
|
|
||||||
# Coverage directory used by tools like istanbul
|
|
||||||
coverage/
|
|
||||||
*.lcov
|
|
||||||
|
|
||||||
# nyc test coverage
|
|
||||||
.nyc_output
|
|
||||||
|
|
||||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
|
||||||
.grunt
|
|
||||||
|
|
||||||
# Bower dependency directory (https://bower.io/)
|
|
||||||
bower_components
|
|
||||||
|
|
||||||
# node-waf configuration
|
|
||||||
.lock-wscript
|
|
||||||
|
|
||||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
|
||||||
build/Release
|
|
||||||
|
|
||||||
# Dependency directories
|
|
||||||
jspm_packages/
|
|
||||||
|
|
||||||
# TypeScript cache
|
|
||||||
*.tsbuildinfo
|
|
||||||
|
|
||||||
# Optional npm cache directory
|
|
||||||
.npm
|
|
||||||
|
|
||||||
# Optional eslint cache
|
|
||||||
.eslintcache
|
|
||||||
|
|
||||||
# Microbundle cache
|
|
||||||
.rpt2_cache/
|
|
||||||
.rts2_cache_cjs/
|
|
||||||
.rts2_cache_es/
|
|
||||||
.rts2_cache_umd/
|
|
||||||
|
|
||||||
# Optional REPL history
|
|
||||||
.node_repl_history
|
|
||||||
|
|
||||||
# Output of 'npm pack'
|
|
||||||
*.tgz
|
*.tgz
|
||||||
|
|
||||||
# Yarn Integrity file
|
# =============================================================================
|
||||||
.yarn-integrity
|
# Sensitive Files
|
||||||
|
# =============================================================================
|
||||||
# dotenv environment variables file
|
|
||||||
.env
|
.env
|
||||||
.env.local
|
.env.*
|
||||||
.env.development.local
|
.envrc
|
||||||
.env.test.local
|
cookies/
|
||||||
.env.production.local
|
*.pem
|
||||||
|
*.key
|
||||||
|
*.cert
|
||||||
|
*secret*
|
||||||
|
*credential*
|
||||||
|
|
||||||
# parcel-bundler cache (https://parceljs.org/)
|
# =============================================================================
|
||||||
.cache
|
# Development Tools & Config
|
||||||
.parcel-cache
|
# =============================================================================
|
||||||
|
# Nix/Devenv
|
||||||
|
.devenv/
|
||||||
|
.devenv.flake.nix
|
||||||
|
devenv.*
|
||||||
|
.direnv/
|
||||||
|
|
||||||
# Next.js build output
|
# Linting/Formatting
|
||||||
.next
|
biome.json
|
||||||
|
.eslintcache
|
||||||
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
# Nuxt.js build / generate output
|
# IDE/Editor
|
||||||
.nuxt
|
|
||||||
dist
|
|
||||||
|
|
||||||
# Gatsby files
|
|
||||||
.cache/
|
|
||||||
public
|
|
||||||
|
|
||||||
# Vuepress build output
|
|
||||||
.vuepress/dist
|
|
||||||
|
|
||||||
# Serverless directories
|
|
||||||
.serverless/
|
|
||||||
|
|
||||||
# FuseBox cache
|
|
||||||
.fusebox/
|
|
||||||
|
|
||||||
# DynamoDB Local files
|
|
||||||
.dynamodb/
|
|
||||||
|
|
||||||
# TernJS port file
|
|
||||||
.tern-port
|
|
||||||
|
|
||||||
# Stores VSCode versions used for testing VSCode extensions
|
|
||||||
.vscode-test
|
|
||||||
|
|
||||||
# IDE and editor files
|
|
||||||
.vscode/
|
.vscode/
|
||||||
.idea/
|
.idea/
|
||||||
*.swp
|
*.swp
|
||||||
*.swo
|
*.swo
|
||||||
*~
|
*~
|
||||||
|
|
||||||
# OS generated files
|
# AI Assistant Config
|
||||||
.DS_Store
|
.claude/
|
||||||
.DS_Store?
|
CLAUDE.md
|
||||||
._*
|
AGENTS.md
|
||||||
.Spotlight-V100
|
opencode.jsonc
|
||||||
.Trashes
|
|
||||||
ehthumbs.db
|
|
||||||
Thumbs.db
|
|
||||||
|
|
||||||
# Git
|
# =============================================================================
|
||||||
.git
|
# Documentation (not needed at runtime)
|
||||||
|
# =============================================================================
|
||||||
|
README.md
|
||||||
|
*.md
|
||||||
|
docs/
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Git & Docker (avoid recursive inclusion)
|
||||||
|
# =============================================================================
|
||||||
|
.git/
|
||||||
.gitignore
|
.gitignore
|
||||||
|
|
||||||
# Docker
|
|
||||||
Dockerfile*
|
Dockerfile*
|
||||||
.dockerignore
|
.dockerignore
|
||||||
|
|
||||||
# Documentation
|
# =============================================================================
|
||||||
README.md
|
# Testing & Coverage
|
||||||
docs/
|
# =============================================================================
|
||||||
|
|
||||||
# Test files
|
|
||||||
test/
|
test/
|
||||||
tests/
|
tests/
|
||||||
*.test.js
|
|
||||||
*.test.ts
|
*.test.ts
|
||||||
*.spec.js
|
|
||||||
*.spec.ts
|
*.spec.ts
|
||||||
|
coverage/
|
||||||
|
*.lcov
|
||||||
|
.nyc_output/
|
||||||
|
|
||||||
# Development files
|
# =============================================================================
|
||||||
CLAUDE.md
|
# OS & Misc
|
||||||
devenv.*
|
# =============================================================================
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
*.log
|
*.log
|
||||||
|
*.pid
|
||||||
# Runtime cookies/config
|
.cache/
|
||||||
cookies/
|
examples/
|
||||||
|
scripts/
|
||||||
|
|||||||
54
AGENTS.md
54
AGENTS.md
@@ -83,7 +83,7 @@ HTTP server using `Bun.serve()` on port 4005 (or `PORT` env var).
|
|||||||
- `GET /api/status` - Health check
|
- `GET /api/status` - Health check
|
||||||
- `GET /api/kijiji?q={query}` - Search Kijiji
|
- `GET /api/kijiji?q={query}` - Search Kijiji
|
||||||
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
|
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
|
||||||
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=` - Search eBay
|
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=&cookies=` - Search eBay
|
||||||
- `GET /api/*` - 404 fallback
|
- `GET /api/*` - 404 fallback
|
||||||
|
|
||||||
### MCP Server (`@marketplace-scrapers/mcp-server`)
|
### MCP Server (`@marketplace-scrapers/mcp-server`)
|
||||||
@@ -96,7 +96,7 @@ MCP JSON-RPC 2.0 server on port 4006 (or `MCP_PORT` env var).
|
|||||||
**Tools:**
|
**Tools:**
|
||||||
- `search_kijiji` - Search Kijiji (query, maxItems)
|
- `search_kijiji` - Search Kijiji (query, maxItems)
|
||||||
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
|
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
|
||||||
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems)
|
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems, cookies)
|
||||||
|
|
||||||
## API Response Formats
|
## API Response Formats
|
||||||
|
|
||||||
@@ -117,6 +117,52 @@ All scrapers return arrays of listing objects with these common fields:
|
|||||||
### eBay-specific fields
|
### eBay-specific fields
|
||||||
Minimal - mainly the common fields
|
Minimal - mainly the common fields
|
||||||
|
|
||||||
|
## Cookie Management
|
||||||
|
|
||||||
|
Both **Facebook Marketplace** and **eBay** require valid session cookies for reliable scraping.
|
||||||
|
|
||||||
|
### Cookie Priority Hierarchy (High → Low)
|
||||||
|
All scrapers follow this loading order:
|
||||||
|
1. **URL/API Parameter** - Passed directly via `cookies` parameter (highest priority)
|
||||||
|
2. **Environment Variable** - `FACEBOOK_COOKIE` or `EBAY_COOKIE`
|
||||||
|
3. **Cookie File** - `cookies/facebook.json` or `cookies/ebay.json` (fallback)
|
||||||
|
|
||||||
|
### Facebook Cookies
|
||||||
|
- **Required for**: Facebook Marketplace scraping
|
||||||
|
- **Format**: JSON array (see `cookies/README.md`)
|
||||||
|
- **Key cookies**: `c_user`, `xs`, `fr`, `datr`, `sb`
|
||||||
|
|
||||||
|
**Setup:**
|
||||||
|
```bash
|
||||||
|
# Option 1: File (fallback)
|
||||||
|
# Create cookies/facebook.json with cookie array
|
||||||
|
|
||||||
|
# Option 2: Environment variable
|
||||||
|
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||||
|
|
||||||
|
# Option 3: URL parameter (highest priority)
|
||||||
|
curl "http://localhost:4005/api/facebook?q=laptop&cookies=[{...}]"
|
||||||
|
```
|
||||||
|
|
||||||
|
### eBay Cookies
|
||||||
|
- **Required for**: Bypassing bot detection
|
||||||
|
- **Format**: Cookie string `"name=value; name2=value2"`
|
||||||
|
- **Key cookies**: `s`, `ds2`, `ebay`, `dp1`, `nonsession`
|
||||||
|
|
||||||
|
**Setup:**
|
||||||
|
```bash
|
||||||
|
# Option 1: File (fallback)
|
||||||
|
# Create cookies/ebay.json with cookie string
|
||||||
|
|
||||||
|
# Option 2: Environment variable
|
||||||
|
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||||
|
|
||||||
|
# Option 3: URL parameter (highest priority)
|
||||||
|
curl "http://localhost:4005/api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Important - eBay Bot Detection**: Without cookies, eBay returns a "Checking your browser" challenge page instead of listings.
|
||||||
|
|
||||||
## Technical Details
|
## Technical Details
|
||||||
|
|
||||||
- **TypeScript** with path mapping (`@/*` → `src/*`) per package
|
- **TypeScript** with path mapping (`@/*` → `src/*`) per package
|
||||||
@@ -126,7 +172,7 @@ Minimal - mainly the common fields
|
|||||||
|
|
||||||
## Development Notes
|
## Development Notes
|
||||||
|
|
||||||
- Facebook requires valid session cookies - set `FACEBOOK_COOKIE` env var or create `cookies/facebook.json`
|
- **Cookie files** are git-ignored for security (see `cookies/README.md`)
|
||||||
- eBay uses custom headers to bypass basic bot detection
|
|
||||||
- Kijiji parses Apollo state from Next.js hydration data
|
- Kijiji parses Apollo state from Next.js hydration data
|
||||||
- All scrapers handle retries on 429/5xx errors
|
- All scrapers handle retries on 429/5xx errors
|
||||||
|
- Cookie priority ensures flexibility across different deployment environments
|
||||||
|
|||||||
@@ -1,24 +1,33 @@
|
|||||||
# Facebook Marketplace Cookies Setup
|
# Marketplace Cookies Setup
|
||||||
|
|
||||||
To use the Facebook Marketplace scraper, you need to provide valid Facebook session cookies.
|
Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings.
|
||||||
|
|
||||||
## Option 1: Cookies File (`facebook.json`)
|
## Cookie Priority Hierarchy
|
||||||
|
|
||||||
1. Log into Facebook in your browser
|
All scrapers follow this priority order (highest to lowest):
|
||||||
2. Open Developer Tools → Network tab
|
1. **URL Parameter** - Passed directly in API/MCP request (overrides all)
|
||||||
3. Visit facebook.com/marketplace (ensure you're logged in)
|
2. **Environment Variable** - Set as `FACEBOOK_COOKIE` or `EBAY_COOKIE`
|
||||||
4. Look for any marketplace-related requests in the Network tab
|
3. **Cookie File** - Stored in `facebook.json` or `ebay.json` (fallback)
|
||||||
5. Export cookies from the browser's Application/Storage → Cookies section
|
|
||||||
6. Save the cookies as a JSON array to `facebook.json`
|
|
||||||
|
|
||||||
The `facebook.json` file should contain Facebook session cookies, particularly:
|
---
|
||||||
|
|
||||||
|
## Facebook Marketplace (`facebook.json`)
|
||||||
|
|
||||||
|
### Required Cookies
|
||||||
- `c_user`: Your Facebook user ID
|
- `c_user`: Your Facebook user ID
|
||||||
- `xs`: Facebook session token
|
- `xs`: Facebook session token
|
||||||
- `fr`: Facebook request token
|
- `fr`: Facebook request token
|
||||||
- `datr`: Data attribution token
|
- `datr`: Data attribution token
|
||||||
- `sb`: Session browser token
|
- `sb`: Session browser token
|
||||||
|
|
||||||
Example structure:
|
### Setup Methods
|
||||||
|
|
||||||
|
**Method 1: Cookie File (Lowest Priority)**
|
||||||
|
1. Log into Facebook in your browser
|
||||||
|
2. Open Developer Tools → Application/Storage → Cookies
|
||||||
|
3. Export cookies as JSON array to `facebook.json`
|
||||||
|
|
||||||
|
Example `facebook.json`:
|
||||||
```json
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
@@ -27,26 +36,59 @@ Example structure:
|
|||||||
"domain": ".facebook.com",
|
"domain": ".facebook.com",
|
||||||
"path": "/",
|
"path": "/",
|
||||||
"secure": true
|
"secure": true
|
||||||
},
|
}
|
||||||
// ... other cookies
|
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
## Option 2: URL Parameter
|
**Method 2: Environment Variable**
|
||||||
|
```bash
|
||||||
You can pass cookies directly via the `cookies` URL parameter:
|
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||||
|
|
||||||
```
|
```
|
||||||
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123","domain":".facebook.com",...}]
|
|
||||||
|
**Method 3: URL Parameter (Highest Priority)**
|
||||||
```
|
```
|
||||||
|
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123",...}]
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## eBay (`ebay.json`)
|
||||||
|
|
||||||
|
eBay has aggressive bot detection that blocks requests without valid session cookies.
|
||||||
|
|
||||||
|
### Setup Methods
|
||||||
|
|
||||||
|
**Method 1: Cookie File (Lowest Priority)**
|
||||||
|
1. Log into eBay in your browser
|
||||||
|
2. Open Developer Tools → Network tab
|
||||||
|
3. Visit ebay.ca and inspect any request headers
|
||||||
|
4. Copy the full `Cookie` header value
|
||||||
|
5. Save as plain text to `ebay.json` (see `ebay.json.example`)
|
||||||
|
|
||||||
|
Example `ebay.json`:
|
||||||
|
```
|
||||||
|
s=VALUE; ds2=VALUE; ebay=VALUE; dp1=VALUE; nonsession=VALUE
|
||||||
|
```
|
||||||
|
|
||||||
|
**Method 2: Environment Variable**
|
||||||
|
```bash
|
||||||
|
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Method 3: URL Parameter (Highest Priority)**
|
||||||
|
```
|
||||||
|
GET /api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE;ebay=VALUE
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Important Notes
|
## Important Notes
|
||||||
|
|
||||||
- Cookies must be from an active Facebook session
|
- Cookies must be from active browser sessions
|
||||||
- Cookies expire, so you may need to refresh them periodically
|
- Cookies expire and need periodic refresh
|
||||||
- Never share real cookies or commit them to version control
|
- **NEVER** commit real cookies to version control
|
||||||
- Facebook may block automated scraping even with valid cookies
|
- Platforms may still block automated scraping despite valid cookies
|
||||||
|
|
||||||
## Security
|
## Security
|
||||||
|
|
||||||
The cookies file is intentionally left out of version control for security reasons.</content>
|
All `*.json` files in this directory are git-ignored for security.</content>
|
||||||
|
|||||||
1
cookies/ebay.json.example
Normal file
1
cookies/ebay.json.example
Normal file
@@ -0,0 +1 @@
|
|||||||
|
s=YOUR_VALUE; ds2=YOUR_VALUE; ebay=YOUR_VALUE; dp1=YOUR_VALUE; nonsession=YOUR_VALUE
|
||||||
@@ -1,8 +1,9 @@
|
|||||||
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
|
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}&cookies={cookies}
|
||||||
* Search eBay for listings (default: Buy It Now only, Canada only)
|
* Search eBay for listings (default: Buy It Now only, Canada only)
|
||||||
|
* Optional: Pass cookies parameter to bypass bot detection
|
||||||
*/
|
*/
|
||||||
export async function ebayRoute(req: Request): Promise<Response> {
|
export async function ebayRoute(req: Request): Promise<Response> {
|
||||||
try {
|
try {
|
||||||
@@ -37,6 +38,7 @@ export async function ebayRoute(req: Request): Promise<Response> {
|
|||||||
|
|
||||||
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||||
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
|
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
|
||||||
|
const cookies = reqUrl.searchParams.get("cookies") || undefined;
|
||||||
|
|
||||||
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
|
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
|
||||||
minPrice,
|
minPrice,
|
||||||
@@ -46,6 +48,7 @@ export async function ebayRoute(req: Request): Promise<Response> {
|
|||||||
keywords,
|
keywords,
|
||||||
buyItNowOnly,
|
buyItNowOnly,
|
||||||
canadaOnly,
|
canadaOnly,
|
||||||
|
cookies,
|
||||||
});
|
});
|
||||||
|
|
||||||
const results = maxItems ? items.slice(0, maxItems) : items;
|
const results = maxItems ? items.slice(0, maxItems) : items;
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ export async function kijijiRoute(req: Request): Promise<Response> {
|
|||||||
maxPages,
|
maxPages,
|
||||||
priceMin,
|
priceMin,
|
||||||
priceMax,
|
priceMax,
|
||||||
|
cookies: reqUrl.searchParams.get("cookies") || undefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -36,7 +36,8 @@ export {
|
|||||||
} from "./scrapers/kijiji";
|
} from "./scrapers/kijiji";
|
||||||
// Export shared types
|
// Export shared types
|
||||||
export * from "./types/common";
|
export * from "./types/common";
|
||||||
|
// Export shared utilities
|
||||||
|
export * from "./utils/cookies";
|
||||||
export * from "./utils/delay";
|
export * from "./utils/delay";
|
||||||
export * from "./utils/format";
|
export * from "./utils/format";
|
||||||
// Export shared utilities
|
|
||||||
export * from "./utils/http";
|
export * from "./utils/http";
|
||||||
|
|||||||
@@ -1,6 +1,19 @@
|
|||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
|
import {
|
||||||
|
type CookieConfig,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
loadCookiesOptional,
|
||||||
|
} from "../utils/cookies";
|
||||||
import { delay } from "../utils/delay";
|
import { delay } from "../utils/delay";
|
||||||
|
|
||||||
|
// eBay cookie configuration
|
||||||
|
const EBAY_COOKIE_CONFIG: CookieConfig = {
|
||||||
|
name: "eBay",
|
||||||
|
domain: ".ebay.ca",
|
||||||
|
envVar: "EBAY_COOKIE",
|
||||||
|
filePath: "./cookies/ebay.json",
|
||||||
|
};
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// ----------------------------- Types -----------------------------
|
||||||
|
|
||||||
export interface EbayListingDetails {
|
export interface EbayListingDetails {
|
||||||
@@ -101,13 +114,26 @@ function parseEbayListings(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Find the container - go up several levels to find the item container
|
// Find the container - go up several levels to find the item container
|
||||||
// Modern eBay uses complex nested structures
|
// Modern eBay uses complex nested structures (often 5-10 levels deep)
|
||||||
let container = linkElement.parentElement?.parentElement?.parentElement;
|
let container: Element | null = linkElement;
|
||||||
if (!container) {
|
let depth = 0;
|
||||||
// Try a different level
|
const maxDepth = 15;
|
||||||
container = linkElement.parentElement?.parentElement;
|
|
||||||
|
// Walk up until we find a list item or results container
|
||||||
|
while (container && depth < maxDepth) {
|
||||||
|
const classes = container.className || "";
|
||||||
|
if (
|
||||||
|
classes.includes("s-item") ||
|
||||||
|
classes.includes("srp-results") ||
|
||||||
|
container.tagName === "LI"
|
||||||
|
) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (!container) continue;
|
container = container.parentElement;
|
||||||
|
depth++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!container || depth >= maxDepth) continue;
|
||||||
|
|
||||||
// Extract title - look for heading or title-related elements near the link
|
// Extract title - look for heading or title-related elements near the link
|
||||||
// Modern eBay often uses h3, span, or div with text content near the link
|
// Modern eBay often uses h3, span, or div with text content near the link
|
||||||
@@ -168,8 +194,9 @@ function parseEbayListings(
|
|||||||
if (title === "Shop on eBay" || title.length < 3) continue;
|
if (title === "Shop on eBay" || title.length < 3) continue;
|
||||||
|
|
||||||
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
||||||
|
// Updated for 2026 eBay HTML structure
|
||||||
let priceElement = container.querySelector(
|
let priceElement = container.querySelector(
|
||||||
'[class*="s-item__price"], .s-item__price, [class*="price"]',
|
'[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]',
|
||||||
);
|
);
|
||||||
|
|
||||||
// If no direct price class, look for spans containing $ (but not titles)
|
// If no direct price class, look for spans containing $ (but not titles)
|
||||||
@@ -305,6 +332,32 @@ function parseEbayListings(
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ----------------------------- Cookie Loading -----------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load eBay cookies with priority: URL param > ENV var > file
|
||||||
|
* Uses shared cookie utility for consistent handling across all scrapers
|
||||||
|
*/
|
||||||
|
async function loadEbayCookies(
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<string | undefined> {
|
||||||
|
const cookies = await loadCookiesOptional(EBAY_COOKIE_CONFIG, cookiesSource);
|
||||||
|
|
||||||
|
if (cookies.length === 0) {
|
||||||
|
console.warn(
|
||||||
|
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
|
||||||
|
"Provide cookies via (in priority order):\n" +
|
||||||
|
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
||||||
|
" 2. EBAY_COOKIE environment variable, or\n" +
|
||||||
|
" 3. ./cookies/ebay.json file (lowest priority)\n" +
|
||||||
|
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||||
|
);
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
return formatCookiesForHeader(cookies, "www.ebay.ca");
|
||||||
|
}
|
||||||
|
|
||||||
// ----------------------------- Main -----------------------------
|
// ----------------------------- Main -----------------------------
|
||||||
|
|
||||||
export default async function fetchEbayItems(
|
export default async function fetchEbayItems(
|
||||||
@@ -318,6 +371,7 @@ export default async function fetchEbayItems(
|
|||||||
keywords?: string[];
|
keywords?: string[];
|
||||||
buyItNowOnly?: boolean;
|
buyItNowOnly?: boolean;
|
||||||
canadaOnly?: boolean;
|
canadaOnly?: boolean;
|
||||||
|
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||||
} = {},
|
} = {},
|
||||||
) {
|
) {
|
||||||
const {
|
const {
|
||||||
@@ -328,8 +382,12 @@ export default async function fetchEbayItems(
|
|||||||
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
||||||
buyItNowOnly = true,
|
buyItNowOnly = true,
|
||||||
canadaOnly = true,
|
canadaOnly = true,
|
||||||
|
cookies: cookiesSource,
|
||||||
} = opts;
|
} = opts;
|
||||||
|
|
||||||
|
// Load eBay cookies with priority: URL param > ENV var > file
|
||||||
|
const cookies = await loadEbayCookies(cookiesSource);
|
||||||
|
|
||||||
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
||||||
const urlParams = new URLSearchParams({
|
const urlParams = new URLSearchParams({
|
||||||
_nkw: SEARCH_QUERY,
|
_nkw: SEARCH_QUERY,
|
||||||
@@ -358,7 +416,7 @@ export default async function fetchEbayItems(
|
|||||||
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
|
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
|
||||||
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
"Accept-Language": "en-US,en;q=0.5",
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||||
Referer: "https://www.ebay.ca/",
|
Referer: "https://www.ebay.ca/",
|
||||||
Connection: "keep-alive",
|
Connection: "keep-alive",
|
||||||
"Upgrade-Insecure-Requests": "1",
|
"Upgrade-Insecure-Requests": "1",
|
||||||
@@ -369,6 +427,11 @@ export default async function fetchEbayItems(
|
|||||||
Priority: "u=0, i",
|
Priority: "u=0, i",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Add cookies if available (helps bypass bot detection)
|
||||||
|
if (cookies) {
|
||||||
|
headers.Cookie = cookies;
|
||||||
|
}
|
||||||
|
|
||||||
const res = await fetch(searchUrl, {
|
const res = await fetch(searchUrl, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers,
|
headers,
|
||||||
|
|||||||
@@ -1,6 +1,13 @@
|
|||||||
import cliProgress from "cli-progress";
|
import cliProgress from "cli-progress";
|
||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import type { HTMLString } from "../types/common";
|
import type { HTMLString } from "../types/common";
|
||||||
|
import {
|
||||||
|
type Cookie,
|
||||||
|
type CookieConfig,
|
||||||
|
ensureCookies,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
parseCookieString,
|
||||||
|
} from "../utils/cookies";
|
||||||
import { delay } from "../utils/delay";
|
import { delay } from "../utils/delay";
|
||||||
import { formatCentsToCurrency } from "../utils/format";
|
import { formatCentsToCurrency } from "../utils/format";
|
||||||
import { isRecord } from "../utils/http";
|
import { isRecord } from "../utils/http";
|
||||||
@@ -13,21 +20,13 @@ import { isRecord } from "../utils/http";
|
|||||||
* This is by design to respect Facebook's authentication requirements.
|
* This is by design to respect Facebook's authentication requirements.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// Facebook cookie configuration
|
||||||
|
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
|
||||||
interface Cookie {
|
name: "Facebook",
|
||||||
name: string;
|
domain: ".facebook.com",
|
||||||
value: string;
|
envVar: "FACEBOOK_COOKIE",
|
||||||
domain: string;
|
filePath: "./cookies/facebook.json",
|
||||||
path: string;
|
};
|
||||||
secure?: boolean;
|
|
||||||
httpOnly?: boolean;
|
|
||||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
|
||||||
session?: boolean;
|
|
||||||
expirationDate?: number;
|
|
||||||
partitionKey?: Record<string, unknown>;
|
|
||||||
storeId?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface FacebookAdNode {
|
interface FacebookAdNode {
|
||||||
node: {
|
node: {
|
||||||
@@ -203,164 +202,24 @@ export interface FacebookListingDetails {
|
|||||||
|
|
||||||
// ----------------------------- Utilities -----------------------------
|
// ----------------------------- Utilities -----------------------------
|
||||||
|
|
||||||
/**
|
|
||||||
* Load Facebook cookies from file or string
|
|
||||||
*/
|
|
||||||
async function loadFacebookCookies(
|
|
||||||
cookiesSource?: string,
|
|
||||||
cookiePath = "./cookies/facebook.json",
|
|
||||||
): Promise<Cookie[]> {
|
|
||||||
// First try to load from provided string parameter
|
|
||||||
if (cookiesSource) {
|
|
||||||
try {
|
|
||||||
const cookies = JSON.parse(cookiesSource);
|
|
||||||
if (Array.isArray(cookies)) {
|
|
||||||
return cookies.filter(
|
|
||||||
(cookie): cookie is Cookie =>
|
|
||||||
cookie &&
|
|
||||||
typeof cookie.name === "string" &&
|
|
||||||
typeof cookie.value === "string",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
throw new Error(`Invalid cookies JSON provided: ${e}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to load from specified path
|
|
||||||
try {
|
|
||||||
const cookiesPath = cookiePath;
|
|
||||||
const file = Bun.file(cookiesPath);
|
|
||||||
if (await file.exists()) {
|
|
||||||
const content = await file.text();
|
|
||||||
const cookies = JSON.parse(content);
|
|
||||||
if (Array.isArray(cookies)) {
|
|
||||||
return cookies.filter(
|
|
||||||
(cookie): cookie is Cookie =>
|
|
||||||
cookie &&
|
|
||||||
typeof cookie.name === "string" &&
|
|
||||||
typeof cookie.value === "string",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse Facebook cookie string into Cookie array format
|
* Parse Facebook cookie string into Cookie array format
|
||||||
|
* @deprecated Use parseCookieString from utils/cookies instead
|
||||||
*/
|
*/
|
||||||
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
||||||
if (!cookieString || !cookieString.trim()) {
|
return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
return cookieString
|
|
||||||
.split(";")
|
|
||||||
.map((pair) => pair.trim())
|
|
||||||
.filter((pair) => pair.includes("="))
|
|
||||||
.map((pair) => {
|
|
||||||
const [name, value] = pair.split("=", 2);
|
|
||||||
const trimmedName = name.trim();
|
|
||||||
const trimmedValue = value.trim();
|
|
||||||
|
|
||||||
// Skip empty names or values
|
|
||||||
if (!trimmedName || !trimmedValue) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
name: trimmedName,
|
|
||||||
value: decodeURIComponent(trimmedValue),
|
|
||||||
domain: ".facebook.com",
|
|
||||||
path: "/",
|
|
||||||
secure: true,
|
|
||||||
httpOnly: false,
|
|
||||||
sameSite: "lax" as const,
|
|
||||||
expirationDate: undefined, // Session cookies
|
|
||||||
};
|
|
||||||
})
|
|
||||||
.filter((cookie): cookie is Cookie => cookie !== null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensure Facebook cookies are available, parsing from env var if needed
|
* Load Facebook cookies with priority: URL param > ENV var > file
|
||||||
|
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
|
||||||
|
* @param _cookiePath - Deprecated, uses default path from config
|
||||||
*/
|
*/
|
||||||
export async function ensureFacebookCookies(
|
export async function ensureFacebookCookies(
|
||||||
cookiePath = "./cookies/facebook.json",
|
cookiesSource?: string,
|
||||||
|
_cookiePath?: string,
|
||||||
): Promise<Cookie[]> {
|
): Promise<Cookie[]> {
|
||||||
// First try to load existing cookies
|
return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
|
||||||
try {
|
|
||||||
const existing = await loadFacebookCookies(undefined, cookiePath);
|
|
||||||
if (existing.length > 0) {
|
|
||||||
return existing;
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// File doesn't exist or is invalid, continue to check env var
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to parse from environment variable
|
|
||||||
const cookieString = process.env.FACEBOOK_COOKIE;
|
|
||||||
if (!cookieString || !cookieString.trim()) {
|
|
||||||
throw new Error(
|
|
||||||
"No valid Facebook cookies found. Either:\n" +
|
|
||||||
" 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
|
|
||||||
" 2. Create ./cookies/facebook.json manually with cookie array",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse the cookie string
|
|
||||||
const cookies = parseFacebookCookieString(cookieString);
|
|
||||||
if (cookies.length === 0) {
|
|
||||||
throw new Error(
|
|
||||||
"FACEBOOK_COOKIE environment variable contains no valid cookies. " +
|
|
||||||
'Expected format: "name1=value1; name2=value2;"',
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save to file for future use
|
|
||||||
try {
|
|
||||||
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
|
|
||||||
console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
|
|
||||||
} catch (error) {
|
|
||||||
console.warn(`Could not save cookies to ${cookiePath}: ${error}`);
|
|
||||||
// Continue anyway, we have the cookies in memory
|
|
||||||
}
|
|
||||||
|
|
||||||
return cookies;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Format cookies array into Cookie header string
|
|
||||||
*/
|
|
||||||
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
|
|
||||||
const validCookies = cookies
|
|
||||||
.filter((cookie) => {
|
|
||||||
// Check if cookie applies to this domain
|
|
||||||
if (cookie.domain.startsWith(".")) {
|
|
||||||
// Domain cookie (applies to subdomains)
|
|
||||||
return (
|
|
||||||
domain.endsWith(cookie.domain.slice(1)) ||
|
|
||||||
domain === cookie.domain.slice(1)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// Host-only cookie
|
|
||||||
return cookie.domain === domain;
|
|
||||||
})
|
|
||||||
.filter((cookie) => {
|
|
||||||
// Check expiration
|
|
||||||
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
|
||||||
return false; // Expired
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
|
|
||||||
return validCookies
|
|
||||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
|
||||||
.join("; ");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class HttpError extends Error {
|
class HttpError extends Error {
|
||||||
@@ -964,22 +823,8 @@ export default async function fetchFacebookItems(
|
|||||||
cookiesSource?: string,
|
cookiesSource?: string,
|
||||||
cookiePath?: string,
|
cookiePath?: string,
|
||||||
) {
|
) {
|
||||||
// Load Facebook cookies - required for Facebook Marketplace access
|
// Load Facebook cookies with priority: URL param > ENV var > file
|
||||||
let cookies: Cookie[];
|
const cookies = await ensureFacebookCookies(cookiesSource, cookiePath);
|
||||||
if (cookiesSource) {
|
|
||||||
// Use provided cookie source (backward compatibility)
|
|
||||||
cookies = await loadFacebookCookies(cookiesSource);
|
|
||||||
} else {
|
|
||||||
// Auto-load from file or parse from env var
|
|
||||||
cookies = await ensureFacebookCookies(cookiePath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cookies.length === 0) {
|
|
||||||
throw new Error(
|
|
||||||
"Facebook cookies are required for marketplace access. " +
|
|
||||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format cookies for HTTP header
|
// Format cookies for HTTP header
|
||||||
const domain = "www.facebook.com";
|
const domain = "www.facebook.com";
|
||||||
@@ -1065,28 +910,13 @@ export default async function fetchFacebookItems(
|
|||||||
export async function fetchFacebookItem(
|
export async function fetchFacebookItem(
|
||||||
itemId: string,
|
itemId: string,
|
||||||
cookiesSource?: string,
|
cookiesSource?: string,
|
||||||
cookiePath?: string,
|
_cookiePath?: string,
|
||||||
): Promise<FacebookListingDetails | null> {
|
): Promise<FacebookListingDetails | null> {
|
||||||
// Load Facebook cookies - required for Facebook Marketplace access
|
// Load Facebook cookies - required for Facebook Marketplace access
|
||||||
let cookies: Cookie[];
|
const cookies = await ensureFacebookCookies(cookiesSource);
|
||||||
if (cookiesSource) {
|
|
||||||
// Use provided cookie source (backward compatibility)
|
|
||||||
cookies = await loadFacebookCookies(cookiesSource);
|
|
||||||
} else {
|
|
||||||
// Auto-load from file or parse from env var
|
|
||||||
cookies = await ensureFacebookCookies(cookiePath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cookies.length === 0) {
|
|
||||||
throw new Error(
|
|
||||||
"Facebook cookies are required for marketplace access. " +
|
|
||||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format cookies for HTTP header
|
// Format cookies for HTTP header
|
||||||
const domain = "www.facebook.com";
|
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
|
||||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
|
||||||
if (!cookiesHeader) {
|
if (!cookiesHeader) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||||
|
|||||||
@@ -2,6 +2,11 @@ import cliProgress from "cli-progress";
|
|||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import unidecode from "unidecode";
|
import unidecode from "unidecode";
|
||||||
import type { HTMLString } from "../types/common";
|
import type { HTMLString } from "../types/common";
|
||||||
|
import {
|
||||||
|
type CookieConfig,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
loadCookiesOptional,
|
||||||
|
} from "../utils/cookies";
|
||||||
import { formatCentsToCurrency } from "../utils/format";
|
import { formatCentsToCurrency } from "../utils/format";
|
||||||
import {
|
import {
|
||||||
fetchHtml,
|
fetchHtml,
|
||||||
@@ -13,6 +18,14 @@ import {
|
|||||||
ValidationError,
|
ValidationError,
|
||||||
} from "../utils/http";
|
} from "../utils/http";
|
||||||
|
|
||||||
|
// Kijiji cookie configuration
|
||||||
|
const KIJIJI_COOKIE_CONFIG: CookieConfig = {
|
||||||
|
name: "Kijiji",
|
||||||
|
domain: ".kijiji.ca",
|
||||||
|
envVar: "KIJIJI_COOKIE",
|
||||||
|
filePath: "./cookies/kijiji.json",
|
||||||
|
};
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// ----------------------------- Types -----------------------------
|
||||||
|
|
||||||
type SearchListing = {
|
type SearchListing = {
|
||||||
@@ -110,6 +123,7 @@ export interface SearchOptions {
|
|||||||
maxPages?: number; // Default: 5
|
maxPages?: number; // Default: 5
|
||||||
priceMin?: number;
|
priceMin?: number;
|
||||||
priceMax?: number;
|
priceMax?: number;
|
||||||
|
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ListingFetchOptions {
|
export interface ListingFetchOptions {
|
||||||
@@ -691,6 +705,16 @@ export default async function fetchKijijiItems(
|
|||||||
) {
|
) {
|
||||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||||
|
|
||||||
|
// Load Kijiji cookies (optional - helps bypass bot detection)
|
||||||
|
const cookies = await loadCookiesOptional(
|
||||||
|
KIJIJI_COOKIE_CONFIG,
|
||||||
|
searchOptions.cookies,
|
||||||
|
);
|
||||||
|
const cookieHeader =
|
||||||
|
cookies.length > 0
|
||||||
|
? formatCookiesForHeader(cookies, "www.kijiji.ca")
|
||||||
|
: undefined;
|
||||||
|
|
||||||
// Set defaults for configuration
|
// Set defaults for configuration
|
||||||
const finalSearchOptions: Required<SearchOptions> = {
|
const finalSearchOptions: Required<SearchOptions> = {
|
||||||
location: searchOptions.location ?? 1700272, // Default to GTA
|
location: searchOptions.location ?? 1700272, // Default to GTA
|
||||||
@@ -701,6 +725,7 @@ export default async function fetchKijijiItems(
|
|||||||
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
||||||
priceMin: searchOptions.priceMin as number,
|
priceMin: searchOptions.priceMin as number,
|
||||||
priceMax: searchOptions.priceMax as number,
|
priceMax: searchOptions.priceMax as number,
|
||||||
|
cookies: searchOptions.cookies ?? "",
|
||||||
};
|
};
|
||||||
|
|
||||||
const finalListingOptions: Required<ListingFetchOptions> = {
|
const finalListingOptions: Required<ListingFetchOptions> = {
|
||||||
@@ -733,6 +758,7 @@ export default async function fetchKijijiItems(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
const searchResults = parseSearch(searchHtml, BASE_URL);
|
const searchResults = parseSearch(searchHtml, BASE_URL);
|
||||||
@@ -782,6 +808,7 @@ export default async function fetchKijijiItems(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
|
||||||
});
|
});
|
||||||
const parsed = await parseDetailedListing(
|
const parsed = await parseDetailedListing(
|
||||||
html,
|
html,
|
||||||
|
|||||||
227
packages/core/src/utils/cookies.ts
Normal file
227
packages/core/src/utils/cookies.ts
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
/**
|
||||||
|
* Shared cookie handling utilities for marketplace scrapers
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface Cookie {
|
||||||
|
name: string;
|
||||||
|
value: string;
|
||||||
|
domain: string;
|
||||||
|
path: string;
|
||||||
|
secure?: boolean;
|
||||||
|
httpOnly?: boolean;
|
||||||
|
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||||
|
session?: boolean;
|
||||||
|
expirationDate?: number;
|
||||||
|
partitionKey?: Record<string, unknown>;
|
||||||
|
storeId?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CookieConfig {
|
||||||
|
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
|
||||||
|
name: string;
|
||||||
|
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
|
||||||
|
domain: string;
|
||||||
|
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
|
||||||
|
envVar: string;
|
||||||
|
/** Path to cookie file (e.g., "./cookies/facebook.json") */
|
||||||
|
filePath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse cookie string format into Cookie array
|
||||||
|
* Supports format: "name1=value1; name2=value2"
|
||||||
|
*/
|
||||||
|
export function parseCookieString(
|
||||||
|
cookieString: string,
|
||||||
|
domain: string,
|
||||||
|
): Cookie[] {
|
||||||
|
if (!cookieString?.trim()) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return cookieString
|
||||||
|
.split(";")
|
||||||
|
.map((pair) => pair.trim())
|
||||||
|
.filter((pair) => pair.includes("="))
|
||||||
|
.map((pair) => {
|
||||||
|
const [name, ...valueParts] = pair.split("=");
|
||||||
|
const trimmedName = name.trim();
|
||||||
|
const trimmedValue = valueParts.join("=").trim();
|
||||||
|
|
||||||
|
if (!trimmedName || !trimmedValue) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: trimmedName,
|
||||||
|
value: decodeURIComponent(trimmedValue),
|
||||||
|
domain,
|
||||||
|
path: "/",
|
||||||
|
secure: true,
|
||||||
|
httpOnly: false,
|
||||||
|
sameSite: "lax" as const,
|
||||||
|
expirationDate: undefined,
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((cookie): cookie is Cookie => cookie !== null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse JSON array format into Cookie array
|
||||||
|
* Supports format: [{"name": "foo", "value": "bar", ...}]
|
||||||
|
*/
|
||||||
|
export function parseJsonCookies(jsonString: string): Cookie[] {
|
||||||
|
const parsed = JSON.parse(jsonString);
|
||||||
|
if (!Array.isArray(parsed)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return parsed.filter(
|
||||||
|
(cookie): cookie is Cookie =>
|
||||||
|
cookie &&
|
||||||
|
typeof cookie.name === "string" &&
|
||||||
|
typeof cookie.value === "string",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to parse cookies from a string (tries JSON first, then cookie string format)
|
||||||
|
*/
|
||||||
|
export function parseCookiesAuto(
|
||||||
|
input: string,
|
||||||
|
defaultDomain: string,
|
||||||
|
): Cookie[] {
|
||||||
|
// Try JSON array format first
|
||||||
|
try {
|
||||||
|
const cookies = parseJsonCookies(input);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// JSON parse failed, try cookie string format
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try cookie string format
|
||||||
|
return parseCookieString(input, defaultDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load cookies from file (supports both JSON array and cookie string formats)
|
||||||
|
*/
|
||||||
|
export async function loadCookiesFromFile(
|
||||||
|
filePath: string,
|
||||||
|
defaultDomain: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
const file = Bun.file(filePath);
|
||||||
|
if (!(await file.exists())) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = await file.text();
|
||||||
|
return parseCookiesAuto(content.trim(), defaultDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format cookies array into Cookie header string for HTTP requests
|
||||||
|
*/
|
||||||
|
export function formatCookiesForHeader(
|
||||||
|
cookies: Cookie[],
|
||||||
|
targetDomain: string,
|
||||||
|
): string {
|
||||||
|
const validCookies = cookies
|
||||||
|
.filter((cookie) => {
|
||||||
|
// Check if cookie applies to this domain
|
||||||
|
if (cookie.domain.startsWith(".")) {
|
||||||
|
// Domain cookie (applies to subdomains)
|
||||||
|
return (
|
||||||
|
targetDomain.endsWith(cookie.domain.slice(1)) ||
|
||||||
|
targetDomain === cookie.domain.slice(1)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Host-only cookie
|
||||||
|
return cookie.domain === targetDomain;
|
||||||
|
})
|
||||||
|
.filter((cookie) => {
|
||||||
|
// Check expiration
|
||||||
|
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
return validCookies
|
||||||
|
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||||
|
.join("; ");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load cookies with priority: URL param > ENV var > file
|
||||||
|
* Supports both JSON array and cookie string formats for all sources
|
||||||
|
*/
|
||||||
|
export async function ensureCookies(
|
||||||
|
config: CookieConfig,
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
// Priority 1: URL/API parameter (if provided)
|
||||||
|
if (cookiesSource) {
|
||||||
|
const cookies = parseCookiesAuto(cookiesSource, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
console.warn(
|
||||||
|
`${config.name} cookies parameter provided but no valid cookies extracted`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 2: Environment variable
|
||||||
|
const envValue = process.env[config.envVar];
|
||||||
|
if (envValue?.trim()) {
|
||||||
|
const cookies = parseCookiesAuto(envValue, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
console.warn(`${config.envVar} env var contains no valid cookies`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 3: Cookie file (fallback)
|
||||||
|
try {
|
||||||
|
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No cookies found from any source
|
||||||
|
throw new Error(
|
||||||
|
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
|
||||||
|
` 1. 'cookies' parameter (highest priority), or\n` +
|
||||||
|
` 2. ${config.envVar} environment variable, or\n` +
|
||||||
|
` 3. ${config.filePath} file (lowest priority)\n` +
|
||||||
|
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to load cookies, return empty array if none found (non-throwing version)
|
||||||
|
*/
|
||||||
|
export async function loadCookiesOptional(
|
||||||
|
config: CookieConfig,
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
try {
|
||||||
|
return await ensureCookies(config, cookiesSource);
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -115,6 +115,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
params.append("priceMin", args.priceMin.toString());
|
params.append("priceMin", args.priceMin.toString());
|
||||||
if (args.priceMax)
|
if (args.priceMax)
|
||||||
params.append("priceMax", args.priceMax.toString());
|
params.append("priceMax", args.priceMax.toString());
|
||||||
|
if (args.cookies) params.append("cookies", args.cookies);
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
|
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
|
||||||
@@ -207,6 +208,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
params.append("canadaOnly", args.canadaOnly.toString());
|
params.append("canadaOnly", args.canadaOnly.toString());
|
||||||
if (args.maxItems)
|
if (args.maxItems)
|
||||||
params.append("maxItems", args.maxItems.toString());
|
params.append("maxItems", args.maxItems.toString());
|
||||||
|
if (args.cookies) params.append("cookies", args.cookies);
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
|
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
|
||||||
|
|||||||
@@ -52,6 +52,11 @@ export const tools = [
|
|||||||
type: "number",
|
type: "number",
|
||||||
description: "Maximum price in cents",
|
description: "Maximum price in cents",
|
||||||
},
|
},
|
||||||
|
cookies: {
|
||||||
|
type: "string",
|
||||||
|
description:
|
||||||
|
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
required: ["query"],
|
required: ["query"],
|
||||||
},
|
},
|
||||||
@@ -133,6 +138,11 @@ export const tools = [
|
|||||||
description: "Maximum number of items to return",
|
description: "Maximum number of items to return",
|
||||||
default: 5,
|
default: 5,
|
||||||
},
|
},
|
||||||
|
cookies: {
|
||||||
|
type: "string",
|
||||||
|
description:
|
||||||
|
"Optional: eBay session cookies to bypass bot detection (format: 'name1=value1; name2=value2')",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
required: ["query"],
|
required: ["query"],
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user