Compare commits
24 Commits
441ff436c4
...
update
| Author | SHA1 | Date | |
|---|---|---|---|
| e4ab145d70 | |||
| 1dce0392e3 | |||
| 251fcbb7d9 | |||
| 9bc57d6b54 | |||
| 4a467c9f02 | |||
| f944d319c2 | |||
| cf9784a565 | |||
| df0c528535 | |||
| 2f97d3eafd | |||
| 65eb8d1724 | |||
| f3839aba54 | |||
| 90b98bfb09 | |||
| eb6705df0f | |||
| 72525609ed | |||
| 8b0a65860c | |||
| f9b1c7e096 | |||
| 9edc74cbeb | |||
| ee0fca826d | |||
| f7372612fb | |||
| bce126664e | |||
| 8cbf11538e | |||
| 79f47fdaef | |||
| de5069bf2b | |||
| 637f1a4e75 |
181
.dockerignore
181
.dockerignore
@@ -1,145 +1,84 @@
|
|||||||
# Dependencies
|
# =============================================================================
|
||||||
|
# Dependencies & Build Output
|
||||||
|
# =============================================================================
|
||||||
node_modules/
|
node_modules/
|
||||||
npm-debug.log*
|
dist/
|
||||||
yarn-debug.log*
|
out/
|
||||||
yarn-error.log*
|
|
||||||
bun.sum
|
|
||||||
|
|
||||||
# Runtime data
|
|
||||||
pids
|
|
||||||
*.pid
|
|
||||||
*.seed
|
|
||||||
*.pid.lock
|
|
||||||
|
|
||||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
|
||||||
lib-cov
|
|
||||||
|
|
||||||
# Coverage directory used by tools like istanbul
|
|
||||||
coverage/
|
|
||||||
*.lcov
|
|
||||||
|
|
||||||
# nyc test coverage
|
|
||||||
.nyc_output
|
|
||||||
|
|
||||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
|
||||||
.grunt
|
|
||||||
|
|
||||||
# Bower dependency directory (https://bower.io/)
|
|
||||||
bower_components
|
|
||||||
|
|
||||||
# node-waf configuration
|
|
||||||
.lock-wscript
|
|
||||||
|
|
||||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
|
||||||
build/Release
|
|
||||||
|
|
||||||
# Dependency directories
|
|
||||||
jspm_packages/
|
|
||||||
|
|
||||||
# TypeScript cache
|
|
||||||
*.tsbuildinfo
|
|
||||||
|
|
||||||
# Optional npm cache directory
|
|
||||||
.npm
|
|
||||||
|
|
||||||
# Optional eslint cache
|
|
||||||
.eslintcache
|
|
||||||
|
|
||||||
# Microbundle cache
|
|
||||||
.rpt2_cache/
|
|
||||||
.rts2_cache_cjs/
|
|
||||||
.rts2_cache_es/
|
|
||||||
.rts2_cache_umd/
|
|
||||||
|
|
||||||
# Optional REPL history
|
|
||||||
.node_repl_history
|
|
||||||
|
|
||||||
# Output of 'npm pack'
|
|
||||||
*.tgz
|
*.tgz
|
||||||
|
|
||||||
# Yarn Integrity file
|
# =============================================================================
|
||||||
.yarn-integrity
|
# Sensitive Files
|
||||||
|
# =============================================================================
|
||||||
# dotenv environment variables file
|
|
||||||
.env
|
.env
|
||||||
.env.local
|
.env.*
|
||||||
.env.development.local
|
.envrc
|
||||||
.env.test.local
|
cookies/
|
||||||
.env.production.local
|
*.pem
|
||||||
|
*.key
|
||||||
|
*.cert
|
||||||
|
*secret*
|
||||||
|
*credential*
|
||||||
|
|
||||||
# parcel-bundler cache (https://parceljs.org/)
|
# =============================================================================
|
||||||
.cache
|
# Development Tools & Config
|
||||||
.parcel-cache
|
# =============================================================================
|
||||||
|
# Nix/Devenv
|
||||||
|
.devenv/
|
||||||
|
.devenv.flake.nix
|
||||||
|
devenv.*
|
||||||
|
.direnv/
|
||||||
|
|
||||||
# Next.js build output
|
# Linting/Formatting
|
||||||
.next
|
biome.json
|
||||||
|
.eslintcache
|
||||||
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
# Nuxt.js build / generate output
|
# IDE/Editor
|
||||||
.nuxt
|
|
||||||
dist
|
|
||||||
|
|
||||||
# Gatsby files
|
|
||||||
.cache/
|
|
||||||
public
|
|
||||||
|
|
||||||
# Vuepress build output
|
|
||||||
.vuepress/dist
|
|
||||||
|
|
||||||
# Serverless directories
|
|
||||||
.serverless/
|
|
||||||
|
|
||||||
# FuseBox cache
|
|
||||||
.fusebox/
|
|
||||||
|
|
||||||
# DynamoDB Local files
|
|
||||||
.dynamodb/
|
|
||||||
|
|
||||||
# TernJS port file
|
|
||||||
.tern-port
|
|
||||||
|
|
||||||
# Stores VSCode versions used for testing VSCode extensions
|
|
||||||
.vscode-test
|
|
||||||
|
|
||||||
# IDE and editor files
|
|
||||||
.vscode/
|
.vscode/
|
||||||
.idea/
|
.idea/
|
||||||
*.swp
|
*.swp
|
||||||
*.swo
|
*.swo
|
||||||
*~
|
*~
|
||||||
|
|
||||||
# OS generated files
|
# AI Assistant Config
|
||||||
.DS_Store
|
.claude/
|
||||||
.DS_Store?
|
CLAUDE.md
|
||||||
._*
|
AGENTS.md
|
||||||
.Spotlight-V100
|
opencode.jsonc
|
||||||
.Trashes
|
|
||||||
ehthumbs.db
|
|
||||||
Thumbs.db
|
|
||||||
|
|
||||||
# Git
|
# =============================================================================
|
||||||
.git
|
# Documentation (not needed at runtime)
|
||||||
|
# =============================================================================
|
||||||
|
README.md
|
||||||
|
*.md
|
||||||
|
docs/
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Git & Docker (avoid recursive inclusion)
|
||||||
|
# =============================================================================
|
||||||
|
.git/
|
||||||
.gitignore
|
.gitignore
|
||||||
|
|
||||||
# Docker
|
|
||||||
Dockerfile*
|
Dockerfile*
|
||||||
.dockerignore
|
.dockerignore
|
||||||
|
|
||||||
# Documentation
|
# =============================================================================
|
||||||
README.md
|
# Testing & Coverage
|
||||||
docs/
|
# =============================================================================
|
||||||
|
|
||||||
# Test files
|
|
||||||
test/
|
test/
|
||||||
tests/
|
tests/
|
||||||
*.test.js
|
|
||||||
*.test.ts
|
*.test.ts
|
||||||
*.spec.js
|
|
||||||
*.spec.ts
|
*.spec.ts
|
||||||
|
coverage/
|
||||||
|
*.lcov
|
||||||
|
.nyc_output/
|
||||||
|
|
||||||
# Development files
|
# =============================================================================
|
||||||
CLAUDE.md
|
# OS & Misc
|
||||||
devenv.*
|
# =============================================================================
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
*.log
|
*.log
|
||||||
|
*.pid
|
||||||
# Runtime cookies/config
|
.cache/
|
||||||
cookies/
|
examples/
|
||||||
|
scripts/
|
||||||
|
|||||||
54
AGENTS.md
54
AGENTS.md
@@ -83,7 +83,7 @@ HTTP server using `Bun.serve()` on port 4005 (or `PORT` env var).
|
|||||||
- `GET /api/status` - Health check
|
- `GET /api/status` - Health check
|
||||||
- `GET /api/kijiji?q={query}` - Search Kijiji
|
- `GET /api/kijiji?q={query}` - Search Kijiji
|
||||||
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
|
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
|
||||||
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=` - Search eBay
|
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=&cookies=` - Search eBay
|
||||||
- `GET /api/*` - 404 fallback
|
- `GET /api/*` - 404 fallback
|
||||||
|
|
||||||
### MCP Server (`@marketplace-scrapers/mcp-server`)
|
### MCP Server (`@marketplace-scrapers/mcp-server`)
|
||||||
@@ -96,7 +96,7 @@ MCP JSON-RPC 2.0 server on port 4006 (or `MCP_PORT` env var).
|
|||||||
**Tools:**
|
**Tools:**
|
||||||
- `search_kijiji` - Search Kijiji (query, maxItems)
|
- `search_kijiji` - Search Kijiji (query, maxItems)
|
||||||
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
|
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
|
||||||
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems)
|
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems, cookies)
|
||||||
|
|
||||||
## API Response Formats
|
## API Response Formats
|
||||||
|
|
||||||
@@ -117,6 +117,52 @@ All scrapers return arrays of listing objects with these common fields:
|
|||||||
### eBay-specific fields
|
### eBay-specific fields
|
||||||
Minimal - mainly the common fields
|
Minimal - mainly the common fields
|
||||||
|
|
||||||
|
## Cookie Management
|
||||||
|
|
||||||
|
Both **Facebook Marketplace** and **eBay** require valid session cookies for reliable scraping.
|
||||||
|
|
||||||
|
### Cookie Priority Hierarchy (High → Low)
|
||||||
|
All scrapers follow this loading order:
|
||||||
|
1. **URL/API Parameter** - Passed directly via `cookies` parameter (highest priority)
|
||||||
|
2. **Environment Variable** - `FACEBOOK_COOKIE` or `EBAY_COOKIE`
|
||||||
|
3. **Cookie File** - `cookies/facebook.json` or `cookies/ebay.json` (fallback)
|
||||||
|
|
||||||
|
### Facebook Cookies
|
||||||
|
- **Required for**: Facebook Marketplace scraping
|
||||||
|
- **Format**: JSON array (see `cookies/README.md`)
|
||||||
|
- **Key cookies**: `c_user`, `xs`, `fr`, `datr`, `sb`
|
||||||
|
|
||||||
|
**Setup:**
|
||||||
|
```bash
|
||||||
|
# Option 1: File (fallback)
|
||||||
|
# Create cookies/facebook.json with cookie array
|
||||||
|
|
||||||
|
# Option 2: Environment variable
|
||||||
|
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||||
|
|
||||||
|
# Option 3: URL parameter (highest priority)
|
||||||
|
curl "http://localhost:4005/api/facebook?q=laptop&cookies=[{...}]"
|
||||||
|
```
|
||||||
|
|
||||||
|
### eBay Cookies
|
||||||
|
- **Required for**: Bypassing bot detection
|
||||||
|
- **Format**: Cookie string `"name=value; name2=value2"`
|
||||||
|
- **Key cookies**: `s`, `ds2`, `ebay`, `dp1`, `nonsession`
|
||||||
|
|
||||||
|
**Setup:**
|
||||||
|
```bash
|
||||||
|
# Option 1: File (fallback)
|
||||||
|
# Create cookies/ebay.json with cookie string
|
||||||
|
|
||||||
|
# Option 2: Environment variable
|
||||||
|
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||||
|
|
||||||
|
# Option 3: URL parameter (highest priority)
|
||||||
|
curl "http://localhost:4005/api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Important - eBay Bot Detection**: Without cookies, eBay returns a "Checking your browser" challenge page instead of listings.
|
||||||
|
|
||||||
## Technical Details
|
## Technical Details
|
||||||
|
|
||||||
- **TypeScript** with path mapping (`@/*` → `src/*`) per package
|
- **TypeScript** with path mapping (`@/*` → `src/*`) per package
|
||||||
@@ -126,7 +172,7 @@ Minimal - mainly the common fields
|
|||||||
|
|
||||||
## Development Notes
|
## Development Notes
|
||||||
|
|
||||||
- Facebook requires valid session cookies - set `FACEBOOK_COOKIE` env var or create `cookies/facebook.json`
|
- **Cookie files** are git-ignored for security (see `cookies/README.md`)
|
||||||
- eBay uses custom headers to bypass basic bot detection
|
|
||||||
- Kijiji parses Apollo state from Next.js hydration data
|
- Kijiji parses Apollo state from Next.js hydration data
|
||||||
- All scrapers handle retries on 429/5xx errors
|
- All scrapers handle retries on 429/5xx errors
|
||||||
|
- Cookie priority ensures flexibility across different deployment environments
|
||||||
|
|||||||
64
biome.json
64
biome.json
@@ -1,34 +1,34 @@
|
|||||||
{
|
{
|
||||||
"$schema": "https://biomejs.dev/schemas/2.3.11/schema.json",
|
"$schema": "https://biomejs.dev/schemas/2.3.11/schema.json",
|
||||||
"vcs": {
|
"vcs": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"clientKind": "git",
|
"clientKind": "git",
|
||||||
"useIgnoreFile": true
|
"useIgnoreFile": true
|
||||||
},
|
},
|
||||||
"files": {
|
"files": {
|
||||||
"includes": ["**", "!!**/dist"]
|
"includes": ["**", "!!**/dist"]
|
||||||
},
|
},
|
||||||
"formatter": {
|
"formatter": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"indentStyle": "space"
|
"indentStyle": "space"
|
||||||
},
|
},
|
||||||
"linter": {
|
"linter": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"rules": {
|
"rules": {
|
||||||
"recommended": true
|
"recommended": true
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"javascript": {
|
"javascript": {
|
||||||
"formatter": {
|
"formatter": {
|
||||||
"quoteStyle": "double"
|
"quoteStyle": "double"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"assist": {
|
"assist": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"actions": {
|
"actions": {
|
||||||
"source": {
|
"source": {
|
||||||
"organizeImports": "on"
|
"organizeImports": "on"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,24 +1,33 @@
|
|||||||
# Facebook Marketplace Cookies Setup
|
# Marketplace Cookies Setup
|
||||||
|
|
||||||
To use the Facebook Marketplace scraper, you need to provide valid Facebook session cookies.
|
Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings.
|
||||||
|
|
||||||
## Option 1: Cookies File (`facebook.json`)
|
## Cookie Priority Hierarchy
|
||||||
|
|
||||||
1. Log into Facebook in your browser
|
All scrapers follow this priority order (highest to lowest):
|
||||||
2. Open Developer Tools → Network tab
|
1. **URL Parameter** - Passed directly in API/MCP request (overrides all)
|
||||||
3. Visit facebook.com/marketplace (ensure you're logged in)
|
2. **Environment Variable** - Set as `FACEBOOK_COOKIE` or `EBAY_COOKIE`
|
||||||
4. Look for any marketplace-related requests in the Network tab
|
3. **Cookie File** - Stored in `facebook.json` or `ebay.json` (fallback)
|
||||||
5. Export cookies from the browser's Application/Storage → Cookies section
|
|
||||||
6. Save the cookies as a JSON array to `facebook.json`
|
|
||||||
|
|
||||||
The `facebook.json` file should contain Facebook session cookies, particularly:
|
---
|
||||||
|
|
||||||
|
## Facebook Marketplace (`facebook.json`)
|
||||||
|
|
||||||
|
### Required Cookies
|
||||||
- `c_user`: Your Facebook user ID
|
- `c_user`: Your Facebook user ID
|
||||||
- `xs`: Facebook session token
|
- `xs`: Facebook session token
|
||||||
- `fr`: Facebook request token
|
- `fr`: Facebook request token
|
||||||
- `datr`: Data attribution token
|
- `datr`: Data attribution token
|
||||||
- `sb`: Session browser token
|
- `sb`: Session browser token
|
||||||
|
|
||||||
Example structure:
|
### Setup Methods
|
||||||
|
|
||||||
|
**Method 1: Cookie File (Lowest Priority)**
|
||||||
|
1. Log into Facebook in your browser
|
||||||
|
2. Open Developer Tools → Application/Storage → Cookies
|
||||||
|
3. Export cookies as JSON array to `facebook.json`
|
||||||
|
|
||||||
|
Example `facebook.json`:
|
||||||
```json
|
```json
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
@@ -27,26 +36,59 @@ Example structure:
|
|||||||
"domain": ".facebook.com",
|
"domain": ".facebook.com",
|
||||||
"path": "/",
|
"path": "/",
|
||||||
"secure": true
|
"secure": true
|
||||||
},
|
}
|
||||||
// ... other cookies
|
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
## Option 2: URL Parameter
|
**Method 2: Environment Variable**
|
||||||
|
```bash
|
||||||
You can pass cookies directly via the `cookies` URL parameter:
|
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||||
|
|
||||||
```
|
```
|
||||||
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123","domain":".facebook.com",...}]
|
|
||||||
|
**Method 3: URL Parameter (Highest Priority)**
|
||||||
```
|
```
|
||||||
|
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123",...}]
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## eBay (`ebay.json`)
|
||||||
|
|
||||||
|
eBay has aggressive bot detection that blocks requests without valid session cookies.
|
||||||
|
|
||||||
|
### Setup Methods
|
||||||
|
|
||||||
|
**Method 1: Cookie File (Lowest Priority)**
|
||||||
|
1. Log into eBay in your browser
|
||||||
|
2. Open Developer Tools → Network tab
|
||||||
|
3. Visit ebay.ca and inspect any request headers
|
||||||
|
4. Copy the full `Cookie` header value
|
||||||
|
5. Save as plain text to `ebay.json` (see `ebay.json.example`)
|
||||||
|
|
||||||
|
Example `ebay.json`:
|
||||||
|
```
|
||||||
|
s=VALUE; ds2=VALUE; ebay=VALUE; dp1=VALUE; nonsession=VALUE
|
||||||
|
```
|
||||||
|
|
||||||
|
**Method 2: Environment Variable**
|
||||||
|
```bash
|
||||||
|
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Method 3: URL Parameter (Highest Priority)**
|
||||||
|
```
|
||||||
|
GET /api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE;ebay=VALUE
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## Important Notes
|
## Important Notes
|
||||||
|
|
||||||
- Cookies must be from an active Facebook session
|
- Cookies must be from active browser sessions
|
||||||
- Cookies expire, so you may need to refresh them periodically
|
- Cookies expire and need periodic refresh
|
||||||
- Never share real cookies or commit them to version control
|
- **NEVER** commit real cookies to version control
|
||||||
- Facebook may block automated scraping even with valid cookies
|
- Platforms may still block automated scraping despite valid cookies
|
||||||
|
|
||||||
## Security
|
## Security
|
||||||
|
|
||||||
The cookies file is intentionally left out of version control for security reasons.</content>
|
All `*.json` files in this directory are git-ignored for security.</content>
|
||||||
|
|||||||
1
cookies/ebay.json.example
Normal file
1
cookies/ebay.json.example
Normal file
@@ -0,0 +1 @@
|
|||||||
|
s=YOUR_VALUE; ds2=YOUR_VALUE; ebay=YOUR_VALUE; dp1=YOUR_VALUE; nonsession=YOUR_VALUE
|
||||||
9
opencode.jsonc
Normal file
9
opencode.jsonc
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"$schema": "https://opencode.ai/config.json",
|
||||||
|
"mcp": {
|
||||||
|
"marketplace-scrape": {
|
||||||
|
"type": "remote",
|
||||||
|
"url": "http://localhost:4006/mcp"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
12
package.json
12
package.json
@@ -2,11 +2,19 @@
|
|||||||
"name": "marketplace-scrapers-monorepo",
|
"name": "marketplace-scrapers-monorepo",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"ci": "biome ci"
|
"ci": "biome ci",
|
||||||
|
"clean": "rm -rf dist",
|
||||||
|
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
|
||||||
|
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
|
||||||
|
"build:all": "bun run build:api && bun run build:mcp",
|
||||||
|
"build": "bun run clean && bun run build:all",
|
||||||
|
"start": "./scripts/start.sh"
|
||||||
},
|
},
|
||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"workspaces": ["packages/*"],
|
"workspaces": [
|
||||||
|
"packages/*"
|
||||||
|
],
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@biomejs/biome": "2.3.11"
|
"@biomejs/biome": "2.3.11"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
|
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}&cookies={cookies}
|
||||||
* Search eBay for listings (default: Buy It Now only, Canada only)
|
* Search eBay for listings (default: Buy It Now only, Canada only)
|
||||||
|
* Optional: Pass cookies parameter to bypass bot detection
|
||||||
*/
|
*/
|
||||||
export async function ebayRoute(req: Request): Promise<Response> {
|
export async function ebayRoute(req: Request): Promise<Response> {
|
||||||
try {
|
try {
|
||||||
@@ -37,6 +38,7 @@ export async function ebayRoute(req: Request): Promise<Response> {
|
|||||||
|
|
||||||
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||||
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
|
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
|
||||||
|
const cookies = reqUrl.searchParams.get("cookies") || undefined;
|
||||||
|
|
||||||
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
|
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
|
||||||
minPrice,
|
minPrice,
|
||||||
@@ -46,6 +48,7 @@ export async function ebayRoute(req: Request): Promise<Response> {
|
|||||||
keywords,
|
keywords,
|
||||||
buyItNowOnly,
|
buyItNowOnly,
|
||||||
canadaOnly,
|
canadaOnly,
|
||||||
|
cookies,
|
||||||
});
|
});
|
||||||
|
|
||||||
const results = maxItems ? items.slice(0, maxItems) : items;
|
const results = maxItems ? items.slice(0, maxItems) : items;
|
||||||
|
|||||||
@@ -41,12 +41,13 @@ export async function kijijiRoute(req: Request): Promise<Response> {
|
|||||||
maxPages,
|
maxPages,
|
||||||
priceMin,
|
priceMin,
|
||||||
priceMax,
|
priceMax,
|
||||||
|
cookies: reqUrl.searchParams.get("cookies") || undefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const items = await fetchKijijiItems(
|
const items = await fetchKijijiItems(
|
||||||
SEARCH_QUERY,
|
SEARCH_QUERY,
|
||||||
1,
|
4, // 4 requests per second for faster scraping
|
||||||
"https://www.kijiji.ca",
|
"https://www.kijiji.ca",
|
||||||
searchOptions,
|
searchOptions,
|
||||||
{},
|
{},
|
||||||
|
|||||||
@@ -2,5 +2,5 @@
|
|||||||
* Health check endpoint
|
* Health check endpoint
|
||||||
*/
|
*/
|
||||||
export function statusRoute(): Response {
|
export function statusRoute(): Response {
|
||||||
return new Response("OK", { status: 200 });
|
return new Response("OK", { status: 200 });
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,45 +1,43 @@
|
|||||||
// Export all scrapers
|
// Export all scrapers
|
||||||
export {
|
|
||||||
default as fetchKijijiItems,
|
|
||||||
slugify,
|
|
||||||
resolveLocationId,
|
|
||||||
resolveCategoryId,
|
|
||||||
buildSearchUrl,
|
|
||||||
extractApolloState,
|
|
||||||
parseSearch,
|
|
||||||
parseDetailedListing,
|
|
||||||
HttpError,
|
|
||||||
NetworkError,
|
|
||||||
ParseError,
|
|
||||||
RateLimitError,
|
|
||||||
ValidationError,
|
|
||||||
} from "./scrapers/kijiji";
|
|
||||||
export type {
|
|
||||||
KijijiListingDetails,
|
|
||||||
DetailedListing,
|
|
||||||
SearchOptions,
|
|
||||||
ListingFetchOptions,
|
|
||||||
} from "./scrapers/kijiji";
|
|
||||||
|
|
||||||
export {
|
|
||||||
default as fetchFacebookItems,
|
|
||||||
fetchFacebookItem,
|
|
||||||
parseFacebookCookieString,
|
|
||||||
ensureFacebookCookies,
|
|
||||||
extractFacebookMarketplaceData,
|
|
||||||
extractFacebookItemData,
|
|
||||||
parseFacebookAds,
|
|
||||||
parseFacebookItem,
|
|
||||||
} from "./scrapers/facebook";
|
|
||||||
export type { FacebookListingDetails } from "./scrapers/facebook";
|
|
||||||
|
|
||||||
export { default as fetchEbayItems } from "./scrapers/ebay";
|
|
||||||
export type { EbayListingDetails } from "./scrapers/ebay";
|
export type { EbayListingDetails } from "./scrapers/ebay";
|
||||||
|
export { default as fetchEbayItems } from "./scrapers/ebay";
|
||||||
// Export shared utilities
|
export type { FacebookListingDetails } from "./scrapers/facebook";
|
||||||
export * from "./utils/http";
|
export {
|
||||||
export * from "./utils/delay";
|
default as fetchFacebookItems,
|
||||||
export * from "./utils/format";
|
ensureFacebookCookies,
|
||||||
|
extractFacebookItemData,
|
||||||
|
extractFacebookMarketplaceData,
|
||||||
|
fetchFacebookItem,
|
||||||
|
parseFacebookAds,
|
||||||
|
parseFacebookCookieString,
|
||||||
|
parseFacebookItem,
|
||||||
|
} from "./scrapers/facebook";
|
||||||
|
export type {
|
||||||
|
DetailedListing,
|
||||||
|
KijijiListingDetails,
|
||||||
|
ListingFetchOptions,
|
||||||
|
SearchOptions,
|
||||||
|
} from "./scrapers/kijiji";
|
||||||
|
export {
|
||||||
|
buildSearchUrl,
|
||||||
|
default as fetchKijijiItems,
|
||||||
|
extractApolloState,
|
||||||
|
HttpError,
|
||||||
|
NetworkError,
|
||||||
|
ParseError,
|
||||||
|
parseDetailedListing,
|
||||||
|
parseSearch,
|
||||||
|
RateLimitError,
|
||||||
|
resolveCategoryId,
|
||||||
|
resolveLocationId,
|
||||||
|
slugify,
|
||||||
|
ValidationError,
|
||||||
|
} from "./scrapers/kijiji";
|
||||||
// Export shared types
|
// Export shared types
|
||||||
export * from "./types/common";
|
export * from "./types/common";
|
||||||
|
// Export shared utilities
|
||||||
|
export * from "./utils/cookies";
|
||||||
|
export * from "./utils/delay";
|
||||||
|
export * from "./utils/format";
|
||||||
|
export * from "./utils/http";
|
||||||
|
|||||||
@@ -1,9 +1,18 @@
|
|||||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import type { HTMLString } from "../types/common";
|
import {
|
||||||
|
type CookieConfig,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
loadCookiesOptional,
|
||||||
|
} from "../utils/cookies";
|
||||||
import { delay } from "../utils/delay";
|
import { delay } from "../utils/delay";
|
||||||
import { formatCentsToCurrency } from "../utils/format";
|
|
||||||
import { isRecord } from "../utils/http";
|
// eBay cookie configuration
|
||||||
|
const EBAY_COOKIE_CONFIG: CookieConfig = {
|
||||||
|
name: "eBay",
|
||||||
|
domain: ".ebay.ca",
|
||||||
|
envVar: "EBAY_COOKIE",
|
||||||
|
filePath: "./cookies/ebay.json",
|
||||||
|
};
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// ----------------------------- Types -----------------------------
|
||||||
|
|
||||||
@@ -43,7 +52,7 @@ function parseEbayPrice(
|
|||||||
|
|
||||||
const amountStr = numberMatches[0].replace(/,/g, "");
|
const amountStr = numberMatches[0].replace(/,/g, "");
|
||||||
const dollars = parseFloat(amountStr);
|
const dollars = parseFloat(amountStr);
|
||||||
if (isNaN(dollars)) return null;
|
if (Number.isNaN(dollars)) return null;
|
||||||
|
|
||||||
const cents = Math.round(dollars * 100);
|
const cents = Math.round(dollars * 100);
|
||||||
|
|
||||||
@@ -105,13 +114,26 @@ function parseEbayListings(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Find the container - go up several levels to find the item container
|
// Find the container - go up several levels to find the item container
|
||||||
// Modern eBay uses complex nested structures
|
// Modern eBay uses complex nested structures (often 5-10 levels deep)
|
||||||
let container = linkElement.parentElement?.parentElement?.parentElement;
|
let container: Element | null = linkElement;
|
||||||
if (!container) {
|
let depth = 0;
|
||||||
// Try a different level
|
const maxDepth = 15;
|
||||||
container = linkElement.parentElement?.parentElement;
|
|
||||||
|
// Walk up until we find a list item or results container
|
||||||
|
while (container && depth < maxDepth) {
|
||||||
|
const classes = container.className || "";
|
||||||
|
if (
|
||||||
|
classes.includes("s-item") ||
|
||||||
|
classes.includes("srp-results") ||
|
||||||
|
container.tagName === "LI"
|
||||||
|
) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
container = container.parentElement;
|
||||||
|
depth++;
|
||||||
}
|
}
|
||||||
if (!container) continue;
|
|
||||||
|
if (!container || depth >= maxDepth) continue;
|
||||||
|
|
||||||
// Extract title - look for heading or title-related elements near the link
|
// Extract title - look for heading or title-related elements near the link
|
||||||
// Modern eBay often uses h3, span, or div with text content near the link
|
// Modern eBay often uses h3, span, or div with text content near the link
|
||||||
@@ -172,8 +194,9 @@ function parseEbayListings(
|
|||||||
if (title === "Shop on eBay" || title.length < 3) continue;
|
if (title === "Shop on eBay" || title.length < 3) continue;
|
||||||
|
|
||||||
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
||||||
|
// Updated for 2026 eBay HTML structure
|
||||||
let priceElement = container.querySelector(
|
let priceElement = container.querySelector(
|
||||||
'[class*="s-item__price"], .s-item__price, [class*="price"]',
|
'[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]',
|
||||||
);
|
);
|
||||||
|
|
||||||
// If no direct price class, look for spans containing $ (but not titles)
|
// If no direct price class, look for spans containing $ (but not titles)
|
||||||
@@ -185,8 +208,7 @@ function parseEbayListings(
|
|||||||
const text = el.textContent?.trim();
|
const text = el.textContent?.trim();
|
||||||
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
|
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
|
||||||
if (
|
if (
|
||||||
text &&
|
text?.includes("$") &&
|
||||||
text.includes("$") &&
|
|
||||||
text.length < 100 &&
|
text.length < 100 &&
|
||||||
!text.includes("laptop") &&
|
!text.includes("laptop") &&
|
||||||
!text.includes("computer") &&
|
!text.includes("computer") &&
|
||||||
@@ -310,6 +332,32 @@ function parseEbayListings(
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ----------------------------- Cookie Loading -----------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load eBay cookies with priority: URL param > ENV var > file
|
||||||
|
* Uses shared cookie utility for consistent handling across all scrapers
|
||||||
|
*/
|
||||||
|
async function loadEbayCookies(
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<string | undefined> {
|
||||||
|
const cookies = await loadCookiesOptional(EBAY_COOKIE_CONFIG, cookiesSource);
|
||||||
|
|
||||||
|
if (cookies.length === 0) {
|
||||||
|
console.warn(
|
||||||
|
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
|
||||||
|
"Provide cookies via (in priority order):\n" +
|
||||||
|
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
||||||
|
" 2. EBAY_COOKIE environment variable, or\n" +
|
||||||
|
" 3. ./cookies/ebay.json file (lowest priority)\n" +
|
||||||
|
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||||
|
);
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
return formatCookiesForHeader(cookies, "www.ebay.ca");
|
||||||
|
}
|
||||||
|
|
||||||
// ----------------------------- Main -----------------------------
|
// ----------------------------- Main -----------------------------
|
||||||
|
|
||||||
export default async function fetchEbayItems(
|
export default async function fetchEbayItems(
|
||||||
@@ -323,6 +371,7 @@ export default async function fetchEbayItems(
|
|||||||
keywords?: string[];
|
keywords?: string[];
|
||||||
buyItNowOnly?: boolean;
|
buyItNowOnly?: boolean;
|
||||||
canadaOnly?: boolean;
|
canadaOnly?: boolean;
|
||||||
|
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||||
} = {},
|
} = {},
|
||||||
) {
|
) {
|
||||||
const {
|
const {
|
||||||
@@ -333,8 +382,12 @@ export default async function fetchEbayItems(
|
|||||||
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
||||||
buyItNowOnly = true,
|
buyItNowOnly = true,
|
||||||
canadaOnly = true,
|
canadaOnly = true,
|
||||||
|
cookies: cookiesSource,
|
||||||
} = opts;
|
} = opts;
|
||||||
|
|
||||||
|
// Load eBay cookies with priority: URL param > ENV var > file
|
||||||
|
const cookies = await loadEbayCookies(cookiesSource);
|
||||||
|
|
||||||
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
||||||
const urlParams = new URLSearchParams({
|
const urlParams = new URLSearchParams({
|
||||||
_nkw: SEARCH_QUERY,
|
_nkw: SEARCH_QUERY,
|
||||||
@@ -363,7 +416,7 @@ export default async function fetchEbayItems(
|
|||||||
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
|
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
|
||||||
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
"Accept-Language": "en-US,en;q=0.5",
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
"Accept-Encoding": "gzip, deflate, br",
|
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||||
Referer: "https://www.ebay.ca/",
|
Referer: "https://www.ebay.ca/",
|
||||||
Connection: "keep-alive",
|
Connection: "keep-alive",
|
||||||
"Upgrade-Insecure-Requests": "1",
|
"Upgrade-Insecure-Requests": "1",
|
||||||
@@ -374,6 +427,11 @@ export default async function fetchEbayItems(
|
|||||||
Priority: "u=0, i",
|
Priority: "u=0, i",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Add cookies if available (helps bypass bot detection)
|
||||||
|
if (cookies) {
|
||||||
|
headers.Cookie = cookies;
|
||||||
|
}
|
||||||
|
|
||||||
const res = await fetch(searchUrl, {
|
const res = await fetch(searchUrl, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers,
|
headers,
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -3,18 +3,18 @@ export type HTMLString = string;
|
|||||||
|
|
||||||
/** Currency price object with formatting options */
|
/** Currency price object with formatting options */
|
||||||
export interface Price {
|
export interface Price {
|
||||||
amountFormatted: string;
|
amountFormatted: string;
|
||||||
cents: number;
|
cents: number;
|
||||||
currency: string;
|
currency: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Base listing details common across all marketplaces */
|
/** Base listing details common across all marketplaces */
|
||||||
export interface ListingDetails {
|
export interface ListingDetails {
|
||||||
url: string;
|
url: string;
|
||||||
title: string;
|
title: string;
|
||||||
listingPrice: Price;
|
listingPrice: Price;
|
||||||
listingType: string;
|
listingType: string;
|
||||||
listingStatus: string;
|
listingStatus: string;
|
||||||
address?: string | null;
|
address?: string | null;
|
||||||
creationDate?: string;
|
creationDate?: string;
|
||||||
}
|
}
|
||||||
|
|||||||
227
packages/core/src/utils/cookies.ts
Normal file
227
packages/core/src/utils/cookies.ts
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
/**
|
||||||
|
* Shared cookie handling utilities for marketplace scrapers
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface Cookie {
|
||||||
|
name: string;
|
||||||
|
value: string;
|
||||||
|
domain: string;
|
||||||
|
path: string;
|
||||||
|
secure?: boolean;
|
||||||
|
httpOnly?: boolean;
|
||||||
|
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||||
|
session?: boolean;
|
||||||
|
expirationDate?: number;
|
||||||
|
partitionKey?: Record<string, unknown>;
|
||||||
|
storeId?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CookieConfig {
|
||||||
|
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
|
||||||
|
name: string;
|
||||||
|
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
|
||||||
|
domain: string;
|
||||||
|
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
|
||||||
|
envVar: string;
|
||||||
|
/** Path to cookie file (e.g., "./cookies/facebook.json") */
|
||||||
|
filePath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse cookie string format into Cookie array
|
||||||
|
* Supports format: "name1=value1; name2=value2"
|
||||||
|
*/
|
||||||
|
export function parseCookieString(
|
||||||
|
cookieString: string,
|
||||||
|
domain: string,
|
||||||
|
): Cookie[] {
|
||||||
|
if (!cookieString?.trim()) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return cookieString
|
||||||
|
.split(";")
|
||||||
|
.map((pair) => pair.trim())
|
||||||
|
.filter((pair) => pair.includes("="))
|
||||||
|
.map((pair) => {
|
||||||
|
const [name, ...valueParts] = pair.split("=");
|
||||||
|
const trimmedName = name.trim();
|
||||||
|
const trimmedValue = valueParts.join("=").trim();
|
||||||
|
|
||||||
|
if (!trimmedName || !trimmedValue) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: trimmedName,
|
||||||
|
value: decodeURIComponent(trimmedValue),
|
||||||
|
domain,
|
||||||
|
path: "/",
|
||||||
|
secure: true,
|
||||||
|
httpOnly: false,
|
||||||
|
sameSite: "lax" as const,
|
||||||
|
expirationDate: undefined,
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((cookie): cookie is Cookie => cookie !== null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse JSON array format into Cookie array
|
||||||
|
* Supports format: [{"name": "foo", "value": "bar", ...}]
|
||||||
|
*/
|
||||||
|
export function parseJsonCookies(jsonString: string): Cookie[] {
|
||||||
|
const parsed = JSON.parse(jsonString);
|
||||||
|
if (!Array.isArray(parsed)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return parsed.filter(
|
||||||
|
(cookie): cookie is Cookie =>
|
||||||
|
cookie &&
|
||||||
|
typeof cookie.name === "string" &&
|
||||||
|
typeof cookie.value === "string",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to parse cookies from a string (tries JSON first, then cookie string format)
|
||||||
|
*/
|
||||||
|
export function parseCookiesAuto(
|
||||||
|
input: string,
|
||||||
|
defaultDomain: string,
|
||||||
|
): Cookie[] {
|
||||||
|
// Try JSON array format first
|
||||||
|
try {
|
||||||
|
const cookies = parseJsonCookies(input);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// JSON parse failed, try cookie string format
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try cookie string format
|
||||||
|
return parseCookieString(input, defaultDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load cookies from file (supports both JSON array and cookie string formats)
|
||||||
|
*/
|
||||||
|
export async function loadCookiesFromFile(
|
||||||
|
filePath: string,
|
||||||
|
defaultDomain: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
const file = Bun.file(filePath);
|
||||||
|
if (!(await file.exists())) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = await file.text();
|
||||||
|
return parseCookiesAuto(content.trim(), defaultDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format cookies array into Cookie header string for HTTP requests
|
||||||
|
*/
|
||||||
|
export function formatCookiesForHeader(
|
||||||
|
cookies: Cookie[],
|
||||||
|
targetDomain: string,
|
||||||
|
): string {
|
||||||
|
const validCookies = cookies
|
||||||
|
.filter((cookie) => {
|
||||||
|
// Check if cookie applies to this domain
|
||||||
|
if (cookie.domain.startsWith(".")) {
|
||||||
|
// Domain cookie (applies to subdomains)
|
||||||
|
return (
|
||||||
|
targetDomain.endsWith(cookie.domain.slice(1)) ||
|
||||||
|
targetDomain === cookie.domain.slice(1)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Host-only cookie
|
||||||
|
return cookie.domain === targetDomain;
|
||||||
|
})
|
||||||
|
.filter((cookie) => {
|
||||||
|
// Check expiration
|
||||||
|
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
return validCookies
|
||||||
|
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||||
|
.join("; ");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load cookies with priority: URL param > ENV var > file
|
||||||
|
* Supports both JSON array and cookie string formats for all sources
|
||||||
|
*/
|
||||||
|
export async function ensureCookies(
|
||||||
|
config: CookieConfig,
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
// Priority 1: URL/API parameter (if provided)
|
||||||
|
if (cookiesSource) {
|
||||||
|
const cookies = parseCookiesAuto(cookiesSource, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
console.warn(
|
||||||
|
`${config.name} cookies parameter provided but no valid cookies extracted`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 2: Environment variable
|
||||||
|
const envValue = process.env[config.envVar];
|
||||||
|
if (envValue?.trim()) {
|
||||||
|
const cookies = parseCookiesAuto(envValue, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
console.warn(`${config.envVar} env var contains no valid cookies`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 3: Cookie file (fallback)
|
||||||
|
try {
|
||||||
|
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No cookies found from any source
|
||||||
|
throw new Error(
|
||||||
|
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
|
||||||
|
` 1. 'cookies' parameter (highest priority), or\n` +
|
||||||
|
` 2. ${config.envVar} environment variable, or\n` +
|
||||||
|
` 3. ${config.filePath} file (lowest priority)\n` +
|
||||||
|
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to load cookies, return empty array if none found (non-throwing version)
|
||||||
|
*/
|
||||||
|
export async function loadCookiesOptional(
|
||||||
|
config: CookieConfig,
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
try {
|
||||||
|
return await ensureCookies(config, cookiesSource);
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,5 +4,5 @@
|
|||||||
* @returns A promise that resolves after the specified delay
|
* @returns A promise that resolves after the specified delay
|
||||||
*/
|
*/
|
||||||
export function delay(ms: number): Promise<void> {
|
export function delay(ms: number): Promise<void> {
|
||||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,18 +4,21 @@
|
|||||||
* @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US')
|
* @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US')
|
||||||
* @returns Formatted currency string
|
* @returns Formatted currency string
|
||||||
*/
|
*/
|
||||||
export function formatCentsToCurrency(cents: number, locale: string = "en-CA"): string {
|
export function formatCentsToCurrency(
|
||||||
try {
|
cents: number,
|
||||||
const formatter = new Intl.NumberFormat(locale, {
|
locale: string = "en-CA",
|
||||||
style: "currency",
|
): string {
|
||||||
currency: "CAD",
|
try {
|
||||||
minimumFractionDigits: 2,
|
const formatter = new Intl.NumberFormat(locale, {
|
||||||
maximumFractionDigits: 2,
|
style: "currency",
|
||||||
});
|
currency: "CAD",
|
||||||
return formatter.format(cents / 100);
|
minimumFractionDigits: 2,
|
||||||
} catch (error) {
|
maximumFractionDigits: 2,
|
||||||
// Fallback if locale is not supported
|
});
|
||||||
const dollars = (cents / 100).toFixed(2);
|
return formatter.format(cents / 100);
|
||||||
return `$${dollars}`;
|
} catch {
|
||||||
}
|
// Fallback if locale is not supported
|
||||||
|
const dollars = (cents / 100).toFixed(2);
|
||||||
|
return `$${dollars}`;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,79 +1,79 @@
|
|||||||
/** Custom error class for HTTP-related failures */
|
/** Custom error class for HTTP-related failures */
|
||||||
export class HttpError extends Error {
|
export class HttpError extends Error {
|
||||||
constructor(
|
constructor(
|
||||||
message: string,
|
message: string,
|
||||||
public readonly statusCode: number,
|
public readonly statusCode: number,
|
||||||
public readonly url?: string
|
public readonly url?: string,
|
||||||
) {
|
) {
|
||||||
super(message);
|
super(message);
|
||||||
this.name = "HttpError";
|
this.name = "HttpError";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Error class for network failures (timeouts, connection issues) */
|
/** Error class for network failures (timeouts, connection issues) */
|
||||||
export class NetworkError extends Error {
|
export class NetworkError extends Error {
|
||||||
constructor(
|
constructor(
|
||||||
message: string,
|
message: string,
|
||||||
public readonly url: string,
|
public readonly url: string,
|
||||||
public readonly cause?: Error
|
public readonly cause?: Error,
|
||||||
) {
|
) {
|
||||||
super(message);
|
super(message);
|
||||||
this.name = "NetworkError";
|
this.name = "NetworkError";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Error class for parsing failures */
|
/** Error class for parsing failures */
|
||||||
export class ParseError extends Error {
|
export class ParseError extends Error {
|
||||||
constructor(
|
constructor(
|
||||||
message: string,
|
message: string,
|
||||||
public readonly data?: unknown
|
public readonly data?: unknown,
|
||||||
) {
|
) {
|
||||||
super(message);
|
super(message);
|
||||||
this.name = "ParseError";
|
this.name = "ParseError";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Error class for rate limiting */
|
/** Error class for rate limiting */
|
||||||
export class RateLimitError extends Error {
|
export class RateLimitError extends Error {
|
||||||
constructor(
|
constructor(
|
||||||
message: string,
|
message: string,
|
||||||
public readonly url: string,
|
public readonly url: string,
|
||||||
public readonly resetTime?: number
|
public readonly resetTime?: number,
|
||||||
) {
|
) {
|
||||||
super(message);
|
super(message);
|
||||||
this.name = "RateLimitError";
|
this.name = "RateLimitError";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Error class for validation failures */
|
/** Error class for validation failures */
|
||||||
export class ValidationError extends Error {
|
export class ValidationError extends Error {
|
||||||
constructor(message: string) {
|
constructor(message: string) {
|
||||||
super(message);
|
super(message);
|
||||||
this.name = "ValidationError";
|
this.name = "ValidationError";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Type guard to check if a value is a record (object) */
|
/** Type guard to check if a value is a record (object) */
|
||||||
export function isRecord(value: unknown): value is Record<string, unknown> {
|
export function isRecord(value: unknown): value is Record<string, unknown> {
|
||||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate exponential backoff delay with jitter
|
* Calculate exponential backoff delay with jitter
|
||||||
*/
|
*/
|
||||||
function calculateBackoffDelay(attempt: number, baseMs: number): number {
|
function calculateBackoffDelay(attempt: number, baseMs: number): number {
|
||||||
const exponentialDelay = baseMs * 2 ** attempt;
|
const exponentialDelay = baseMs * 2 ** attempt;
|
||||||
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
|
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
|
||||||
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
|
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Options for fetchHtml */
|
/** Options for fetchHtml */
|
||||||
export interface FetchHtmlOptions {
|
export interface FetchHtmlOptions {
|
||||||
maxRetries?: number;
|
maxRetries?: number;
|
||||||
retryBaseMs?: number;
|
retryBaseMs?: number;
|
||||||
timeoutMs?: number;
|
timeoutMs?: number;
|
||||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||||
headers?: Record<string, string>;
|
headers?: Record<string, string>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -85,116 +85,116 @@ export interface FetchHtmlOptions {
|
|||||||
* @throws HttpError, NetworkError, or RateLimitError on failure
|
* @throws HttpError, NetworkError, or RateLimitError on failure
|
||||||
*/
|
*/
|
||||||
export async function fetchHtml(
|
export async function fetchHtml(
|
||||||
url: string,
|
url: string,
|
||||||
delayMs: number,
|
delayMs: number,
|
||||||
opts?: FetchHtmlOptions
|
opts?: FetchHtmlOptions,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const maxRetries = opts?.maxRetries ?? 3;
|
const maxRetries = opts?.maxRetries ?? 3;
|
||||||
const retryBaseMs = opts?.retryBaseMs ?? 1000;
|
const retryBaseMs = opts?.retryBaseMs ?? 1000;
|
||||||
const timeoutMs = opts?.timeoutMs ?? 30000;
|
const timeoutMs = opts?.timeoutMs ?? 30000;
|
||||||
|
|
||||||
const defaultHeaders: Record<string, string> = {
|
const defaultHeaders: Record<string, string> = {
|
||||||
accept:
|
accept:
|
||||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||||
"cache-control": "no-cache",
|
"cache-control": "no-cache",
|
||||||
"upgrade-insecure-requests": "1",
|
"upgrade-insecure-requests": "1",
|
||||||
"user-agent":
|
"user-agent":
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
|
||||||
};
|
};
|
||||||
|
|
||||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||||
try {
|
try {
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
||||||
|
|
||||||
const res = await fetch(url, {
|
const res = await fetch(url, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
headers: { ...defaultHeaders, ...opts?.headers },
|
headers: { ...defaultHeaders, ...opts?.headers },
|
||||||
signal: controller.signal,
|
signal: controller.signal,
|
||||||
});
|
});
|
||||||
|
|
||||||
clearTimeout(timeoutId);
|
clearTimeout(timeoutId);
|
||||||
|
|
||||||
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
||||||
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
||||||
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
||||||
|
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
// Handle rate limiting
|
// Handle rate limiting
|
||||||
if (res.status === 429) {
|
if (res.status === 429) {
|
||||||
const resetSeconds = rateLimitReset
|
const resetSeconds = rateLimitReset
|
||||||
? Number(rateLimitReset)
|
? Number(rateLimitReset)
|
||||||
: Number.NaN;
|
: Number.NaN;
|
||||||
const waitMs = Number.isFinite(resetSeconds)
|
const waitMs = Number.isFinite(resetSeconds)
|
||||||
? Math.max(0, resetSeconds * 1000)
|
? Math.max(0, resetSeconds * 1000)
|
||||||
: calculateBackoffDelay(attempt, retryBaseMs);
|
: calculateBackoffDelay(attempt, retryBaseMs);
|
||||||
|
|
||||||
if (attempt < maxRetries) {
|
if (attempt < maxRetries) {
|
||||||
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
throw new RateLimitError(
|
throw new RateLimitError(
|
||||||
`Rate limit exceeded for ${url}`,
|
`Rate limit exceeded for ${url}`,
|
||||||
url,
|
url,
|
||||||
resetSeconds
|
resetSeconds,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retry on server errors
|
// Retry on server errors
|
||||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||||
await new Promise((resolve) =>
|
await new Promise((resolve) =>
|
||||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new HttpError(
|
throw new HttpError(
|
||||||
`Request failed with status ${res.status}`,
|
`Request failed with status ${res.status}`,
|
||||||
res.status,
|
res.status,
|
||||||
url
|
url,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const html = await res.text();
|
const html = await res.text();
|
||||||
|
|
||||||
// Respect per-request delay to maintain rate limiting
|
// Respect per-request delay to maintain rate limiting
|
||||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||||
return html;
|
return html;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
// Re-throw known errors
|
// Re-throw known errors
|
||||||
if (
|
if (
|
||||||
err instanceof RateLimitError ||
|
err instanceof RateLimitError ||
|
||||||
err instanceof HttpError ||
|
err instanceof HttpError ||
|
||||||
err instanceof NetworkError
|
err instanceof NetworkError
|
||||||
) {
|
) {
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (err instanceof Error && err.name === "AbortError") {
|
if (err instanceof Error && err.name === "AbortError") {
|
||||||
if (attempt < maxRetries) {
|
if (attempt < maxRetries) {
|
||||||
await new Promise((resolve) =>
|
await new Promise((resolve) =>
|
||||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
throw new NetworkError(`Request timeout for ${url}`, url, err);
|
throw new NetworkError(`Request timeout for ${url}`, url, err);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Network or other errors
|
// Network or other errors
|
||||||
if (attempt < maxRetries) {
|
if (attempt < maxRetries) {
|
||||||
await new Promise((resolve) =>
|
await new Promise((resolve) =>
|
||||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||||
);
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
throw new NetworkError(
|
throw new NetworkError(
|
||||||
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
url,
|
url,
|
||||||
err instanceof Error ? err : undefined
|
err instanceof Error ? err : undefined,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new NetworkError(`Exhausted retries without response for ${url}`, url);
|
throw new NetworkError(`Exhausted retries without response for ${url}`, url);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import {
|
|||||||
fetchFacebookItem,
|
fetchFacebookItem,
|
||||||
formatCentsToCurrency,
|
formatCentsToCurrency,
|
||||||
formatCookiesForHeader,
|
formatCookiesForHeader,
|
||||||
loadFacebookCookies,
|
|
||||||
parseFacebookAds,
|
parseFacebookAds,
|
||||||
parseFacebookCookieString,
|
parseFacebookCookieString,
|
||||||
parseFacebookItem,
|
parseFacebookItem,
|
||||||
@@ -183,7 +182,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
const result = await fetchFacebookItem("123", mockCookies);
|
const _result = await fetchFacebookItem("123", mockCookies);
|
||||||
expect(attempts).toBe(2);
|
expect(attempts).toBe(2);
|
||||||
// Should eventually succeed after retry
|
// Should eventually succeed after retry
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||||
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook";
|
import { fetchFacebookItems } from "../src/scrapers/facebook";
|
||||||
|
|
||||||
// Mock fetch globally
|
// Mock fetch globally
|
||||||
const originalFetch = global.fetch;
|
const originalFetch = global.fetch;
|
||||||
|
|||||||
@@ -1,166 +1,157 @@
|
|||||||
import { describe, expect, test } from "bun:test";
|
import { describe, expect, test } from "bun:test";
|
||||||
import {
|
import {
|
||||||
HttpError,
|
buildSearchUrl,
|
||||||
NetworkError,
|
NetworkError,
|
||||||
ParseError,
|
ParseError,
|
||||||
RateLimitError,
|
RateLimitError,
|
||||||
ValidationError,
|
resolveCategoryId,
|
||||||
buildSearchUrl,
|
resolveLocationId,
|
||||||
resolveCategoryId,
|
ValidationError,
|
||||||
resolveLocationId,
|
|
||||||
} from "../src/scrapers/kijiji";
|
} from "../src/scrapers/kijiji";
|
||||||
|
|
||||||
describe("Location and Category Resolution", () => {
|
describe("Location and Category Resolution", () => {
|
||||||
describe("resolveLocationId", () => {
|
describe("resolveLocationId", () => {
|
||||||
test("should return numeric IDs as-is", () => {
|
test("should return numeric IDs as-is", () => {
|
||||||
expect(resolveLocationId(1700272)).toBe(1700272);
|
expect(resolveLocationId(1700272)).toBe(1700272);
|
||||||
expect(resolveLocationId(0)).toBe(0);
|
expect(resolveLocationId(0)).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should resolve string location names", () => {
|
test("should resolve string location names", () => {
|
||||||
expect(resolveLocationId("canada")).toBe(0);
|
expect(resolveLocationId("canada")).toBe(0);
|
||||||
expect(resolveLocationId("ontario")).toBe(9004);
|
expect(resolveLocationId("ontario")).toBe(9004);
|
||||||
expect(resolveLocationId("toronto")).toBe(1700273);
|
expect(resolveLocationId("toronto")).toBe(1700273);
|
||||||
expect(resolveLocationId("gta")).toBe(1700272);
|
expect(resolveLocationId("gta")).toBe(1700272);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle case insensitive matching", () => {
|
test("should handle case insensitive matching", () => {
|
||||||
expect(resolveLocationId("Canada")).toBe(0);
|
expect(resolveLocationId("Canada")).toBe(0);
|
||||||
expect(resolveLocationId("ONTARIO")).toBe(9004);
|
expect(resolveLocationId("ONTARIO")).toBe(9004);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should default to Canada for unknown locations", () => {
|
test("should default to Canada for unknown locations", () => {
|
||||||
expect(resolveLocationId("unknown")).toBe(0);
|
expect(resolveLocationId("unknown")).toBe(0);
|
||||||
expect(resolveLocationId("")).toBe(0);
|
expect(resolveLocationId("")).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle undefined input", () => {
|
test("should handle undefined input", () => {
|
||||||
expect(resolveLocationId(undefined)).toBe(0);
|
expect(resolveLocationId(undefined)).toBe(0);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("resolveCategoryId", () => {
|
describe("resolveCategoryId", () => {
|
||||||
test("should return numeric IDs as-is", () => {
|
test("should return numeric IDs as-is", () => {
|
||||||
expect(resolveCategoryId(132)).toBe(132);
|
expect(resolveCategoryId(132)).toBe(132);
|
||||||
expect(resolveCategoryId(0)).toBe(0);
|
expect(resolveCategoryId(0)).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should resolve string category names", () => {
|
test("should resolve string category names", () => {
|
||||||
expect(resolveCategoryId("all")).toBe(0);
|
expect(resolveCategoryId("all")).toBe(0);
|
||||||
expect(resolveCategoryId("phones")).toBe(132);
|
expect(resolveCategoryId("phones")).toBe(132);
|
||||||
expect(resolveCategoryId("electronics")).toBe(29659001);
|
expect(resolveCategoryId("electronics")).toBe(29659001);
|
||||||
expect(resolveCategoryId("buy-sell")).toBe(10);
|
expect(resolveCategoryId("buy-sell")).toBe(10);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle case insensitive matching", () => {
|
test("should handle case insensitive matching", () => {
|
||||||
expect(resolveCategoryId("All")).toBe(0);
|
expect(resolveCategoryId("All")).toBe(0);
|
||||||
expect(resolveCategoryId("PHONES")).toBe(132);
|
expect(resolveCategoryId("PHONES")).toBe(132);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should default to all categories for unknown categories", () => {
|
test("should default to all categories for unknown categories", () => {
|
||||||
expect(resolveCategoryId("unknown")).toBe(0);
|
expect(resolveCategoryId("unknown")).toBe(0);
|
||||||
expect(resolveCategoryId("")).toBe(0);
|
expect(resolveCategoryId("")).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle undefined input", () => {
|
test("should handle undefined input", () => {
|
||||||
expect(resolveCategoryId(undefined)).toBe(0);
|
expect(resolveCategoryId(undefined)).toBe(0);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("URL Construction", () => {
|
describe("URL Construction", () => {
|
||||||
describe("buildSearchUrl", () => {
|
describe("buildSearchUrl", () => {
|
||||||
test("should build basic search URL", () => {
|
test("should build basic search URL", () => {
|
||||||
const url = buildSearchUrl("iphone", {
|
const url = buildSearchUrl("iphone", {
|
||||||
location: 1700272,
|
location: 1700272,
|
||||||
category: 132,
|
category: 132,
|
||||||
sortBy: "relevancy",
|
sortBy: "relevancy",
|
||||||
sortOrder: "desc",
|
sortOrder: "desc",
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
|
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
|
||||||
expect(url).toContain("sort=relevancyDesc");
|
expect(url).toContain("sort=relevancyDesc");
|
||||||
expect(url).toContain("order=DESC");
|
expect(url).toContain("order=DESC");
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle pagination", () => {
|
test("should handle pagination", () => {
|
||||||
const url = buildSearchUrl("iphone", {
|
const url = buildSearchUrl("iphone", {
|
||||||
location: 1700272,
|
location: 1700272,
|
||||||
category: 132,
|
category: 132,
|
||||||
page: 2,
|
page: 2,
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(url).toContain("&page=2");
|
expect(url).toContain("&page=2");
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle different sort options", () => {
|
test("should handle different sort options", () => {
|
||||||
const dateUrl = buildSearchUrl("iphone", {
|
const dateUrl = buildSearchUrl("iphone", {
|
||||||
sortBy: "date",
|
sortBy: "date",
|
||||||
sortOrder: "asc",
|
sortOrder: "asc",
|
||||||
});
|
});
|
||||||
expect(dateUrl).toContain("sort=DATE");
|
expect(dateUrl).toContain("sort=DATE");
|
||||||
expect(dateUrl).toContain("order=ASC");
|
expect(dateUrl).toContain("order=ASC");
|
||||||
|
|
||||||
const priceUrl = buildSearchUrl("iphone", {
|
const priceUrl = buildSearchUrl("iphone", {
|
||||||
sortBy: "price",
|
sortBy: "price",
|
||||||
sortOrder: "desc",
|
sortOrder: "desc",
|
||||||
});
|
});
|
||||||
expect(priceUrl).toContain("sort=PRICE");
|
expect(priceUrl).toContain("sort=PRICE");
|
||||||
expect(priceUrl).toContain("order=DESC");
|
expect(priceUrl).toContain("order=DESC");
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should handle string location/category inputs", () => {
|
test("should handle string location/category inputs", () => {
|
||||||
const url = buildSearchUrl("iphone", {
|
const url = buildSearchUrl("iphone", {
|
||||||
location: "toronto",
|
location: "toronto",
|
||||||
category: "phones",
|
category: "phones",
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(url).toContain("k0c132l1700273"); // phones + toronto
|
expect(url).toContain("k0c132l1700273"); // phones + toronto
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("Error Classes", () => {
|
describe("Error Classes", () => {
|
||||||
test("HttpError should store status and URL", () => {
|
test("NetworkError should store URL and cause", () => {
|
||||||
const error = new HttpError("Not found", 404, "https://example.com");
|
const cause = new Error("Connection failed");
|
||||||
expect(error.message).toBe("Not found");
|
const error = new NetworkError(
|
||||||
expect(error.statusCode).toBe(404);
|
"Network error",
|
||||||
expect(error.url).toBe("https://example.com");
|
"https://example.com",
|
||||||
expect(error.name).toBe("HttpError");
|
cause,
|
||||||
});
|
);
|
||||||
|
expect(error.message).toBe("Network error");
|
||||||
|
expect(error.url).toBe("https://example.com");
|
||||||
|
expect(error.cause).toBe(cause);
|
||||||
|
expect(error.name).toBe("NetworkError");
|
||||||
|
});
|
||||||
|
|
||||||
test("NetworkError should store URL and cause", () => {
|
test("ParseError should store data", () => {
|
||||||
const cause = new Error("Connection failed");
|
const data = { invalid: "json" };
|
||||||
const error = new NetworkError(
|
const error = new ParseError("Invalid JSON", data);
|
||||||
"Network error",
|
expect(error.message).toBe("Invalid JSON");
|
||||||
"https://example.com",
|
expect(error.data).toBe(data);
|
||||||
cause
|
expect(error.name).toBe("ParseError");
|
||||||
);
|
});
|
||||||
expect(error.message).toBe("Network error");
|
|
||||||
expect(error.url).toBe("https://example.com");
|
|
||||||
expect(error.cause).toBe(cause);
|
|
||||||
expect(error.name).toBe("NetworkError");
|
|
||||||
});
|
|
||||||
|
|
||||||
test("ParseError should store data", () => {
|
test("RateLimitError should store URL and reset time", () => {
|
||||||
const data = { invalid: "json" };
|
const error = new RateLimitError("Rate limited", "https://example.com", 60);
|
||||||
const error = new ParseError("Invalid JSON", data);
|
expect(error.message).toBe("Rate limited");
|
||||||
expect(error.message).toBe("Invalid JSON");
|
expect(error.url).toBe("https://example.com");
|
||||||
expect(error.data).toBe(data);
|
expect(error.resetTime).toBe(60);
|
||||||
expect(error.name).toBe("ParseError");
|
expect(error.name).toBe("RateLimitError");
|
||||||
});
|
});
|
||||||
|
|
||||||
test("RateLimitError should store URL and reset time", () => {
|
test("ValidationError should work without field", () => {
|
||||||
const error = new RateLimitError("Rate limited", "https://example.com", 60);
|
const error = new ValidationError("Invalid value");
|
||||||
expect(error.message).toBe("Rate limited");
|
expect(error.message).toBe("Invalid value");
|
||||||
expect(error.url).toBe("https://example.com");
|
expect(error.name).toBe("ValidationError");
|
||||||
expect(error.resetTime).toBe(60);
|
});
|
||||||
expect(error.name).toBe("RateLimitError");
|
|
||||||
});
|
|
||||||
|
|
||||||
test("ValidationError should work without field", () => {
|
|
||||||
const error = new ValidationError("Invalid value");
|
|
||||||
expect(error.message).toBe("Invalid value");
|
|
||||||
expect(error.name).toBe("ValidationError");
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
import { describe, expect, test } from "bun:test";
|
||||||
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
|
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
|
||||||
|
|
||||||
describe("Utility Functions", () => {
|
describe("Utility Functions", () => {
|
||||||
|
|||||||
@@ -3,9 +3,9 @@
|
|||||||
|
|
||||||
// Mock fetch globally for tests
|
// Mock fetch globally for tests
|
||||||
global.fetch =
|
global.fetch =
|
||||||
global.fetch ||
|
global.fetch ||
|
||||||
(() => {
|
(() => {
|
||||||
throw new Error("fetch is not available in test environment");
|
throw new Error("fetch is not available in test environment");
|
||||||
});
|
});
|
||||||
|
|
||||||
// Add any global test utilities here
|
// Add any global test utilities here
|
||||||
|
|||||||
@@ -4,30 +4,33 @@ import { serverCard } from "./protocol/metadata";
|
|||||||
const PORT = process.env.MCP_PORT || 4006;
|
const PORT = process.env.MCP_PORT || 4006;
|
||||||
|
|
||||||
const server = Bun.serve({
|
const server = Bun.serve({
|
||||||
port: PORT as number | string,
|
port: PORT as number | string,
|
||||||
idleTimeout: 0,
|
idleTimeout: 255, // 255 seconds (max allowed)
|
||||||
routes: {
|
routes: {
|
||||||
// MCP metadata discovery endpoint
|
// MCP metadata discovery endpoint
|
||||||
"/.well-known/mcp/server-card.json": new Response(JSON.stringify(serverCard), {
|
"/.well-known/mcp/server-card.json": new Response(
|
||||||
headers: { "Content-Type": "application/json" },
|
JSON.stringify(serverCard),
|
||||||
}),
|
{
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
},
|
||||||
|
),
|
||||||
|
|
||||||
// MCP JSON-RPC 2.0 protocol endpoint
|
// MCP JSON-RPC 2.0 protocol endpoint
|
||||||
"/mcp": async (req: Request) => {
|
"/mcp": async (req: Request) => {
|
||||||
if (req.method === "POST") {
|
if (req.method === "POST") {
|
||||||
return await handleMcpRequest(req);
|
return await handleMcpRequest(req);
|
||||||
}
|
}
|
||||||
return Response.json(
|
return Response.json(
|
||||||
{ message: "MCP endpoint requires POST request" },
|
{ message: "MCP endpoint requires POST request" },
|
||||||
{ status: 405 }
|
{ status: 405 },
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
// Fallback for all other routes
|
// Fallback for all other routes
|
||||||
fetch(req: Request) {
|
fetch(_req: Request) {
|
||||||
return new Response("Not Found", { status: 404 });
|
return new Response("Not Found", { status: 404 });
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log(`MCP Server running on ${server.hostname}:${server.port}`);
|
console.log(`MCP Server running on ${server.hostname}:${server.port}`);
|
||||||
|
|||||||
@@ -1,206 +1,292 @@
|
|||||||
import { fetchKijijiItems, fetchFacebookItems, fetchEbayItems } from "@marketplace-scrapers/core";
|
|
||||||
import { tools } from "./tools";
|
import { tools } from "./tools";
|
||||||
|
|
||||||
|
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
|
||||||
|
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handle MCP JSON-RPC 2.0 protocol requests
|
* Handle MCP JSON-RPC 2.0 protocol requests
|
||||||
*/
|
*/
|
||||||
export async function handleMcpRequest(req: Request): Promise<Response> {
|
export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||||
try {
|
try {
|
||||||
const body = await req.json();
|
const body = await req.json();
|
||||||
|
|
||||||
// Validate JSON-RPC 2.0 format
|
// Validate JSON-RPC 2.0 format
|
||||||
if (!body.jsonrpc || body.jsonrpc !== "2.0" || !body.method) {
|
if (!body.jsonrpc || body.jsonrpc !== "2.0" || !body.method) {
|
||||||
return Response.json(
|
return Response.json(
|
||||||
{
|
{
|
||||||
jsonrpc: "2.0",
|
jsonrpc: "2.0",
|
||||||
error: { code: -32600, message: "Invalid Request" },
|
error: { code: -32600, message: "Invalid Request" },
|
||||||
id: body.id,
|
id: body.id,
|
||||||
},
|
},
|
||||||
{ status: 400 }
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const { method, params, id } = body;
|
const { method, params, id } = body;
|
||||||
|
|
||||||
// Handle initialize method
|
// Handle initialize method
|
||||||
if (method === "initialize") {
|
if (method === "initialize") {
|
||||||
return Response.json({
|
return Response.json({
|
||||||
jsonrpc: "2.0",
|
jsonrpc: "2.0",
|
||||||
id,
|
id,
|
||||||
result: {
|
result: {
|
||||||
protocolVersion: "2025-06-18",
|
protocolVersion: "2025-06-18",
|
||||||
capabilities: {
|
capabilities: {
|
||||||
tools: {
|
tools: {
|
||||||
listChanged: true,
|
listChanged: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
serverInfo: {
|
serverInfo: {
|
||||||
name: "marketplace-scrapers",
|
name: "marketplace-scrapers",
|
||||||
version: "1.0.0",
|
version: "1.0.0",
|
||||||
},
|
},
|
||||||
instructions: "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
|
instructions:
|
||||||
},
|
"Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
|
||||||
});
|
},
|
||||||
}
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Handle tools/list method
|
// Handle tools/list method
|
||||||
if (method === "tools/list") {
|
if (method === "tools/list") {
|
||||||
return Response.json({
|
return Response.json({
|
||||||
jsonrpc: "2.0",
|
jsonrpc: "2.0",
|
||||||
id,
|
id,
|
||||||
result: {
|
result: {
|
||||||
tools,
|
tools,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle notifications (messages without id field should not get a response)
|
// Handle notifications (messages without id field should not get a response)
|
||||||
if (!id) {
|
if (!id) {
|
||||||
// Notifications don't require a response
|
// Notifications don't require a response
|
||||||
if (method === "notifications/initialized") {
|
if (method === "notifications/initialized") {
|
||||||
// Client initialized successfully, no response needed
|
// Client initialized successfully, no response needed
|
||||||
return new Response(null, { status: 204 });
|
return new Response(null, { status: 204 });
|
||||||
}
|
}
|
||||||
if (method === "notifications/progress") {
|
if (method === "notifications/progress") {
|
||||||
// Progress notifications, no response needed
|
// Progress notifications, no response needed
|
||||||
return new Response(null, { status: 204 });
|
return new Response(null, { status: 204 });
|
||||||
}
|
}
|
||||||
// Unknown notification - still no response for notifications
|
// Unknown notification - still no response for notifications
|
||||||
return new Response(null, { status: 204 });
|
return new Response(null, { status: 204 });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle tools/call method
|
// Handle tools/call method
|
||||||
if (method === "tools/call") {
|
if (method === "tools/call") {
|
||||||
const { name, arguments: args } = params || {};
|
const { name, arguments: args } = params || {};
|
||||||
|
|
||||||
if (!name || !args) {
|
if (!name || !args) {
|
||||||
return Response.json(
|
return Response.json(
|
||||||
{
|
{
|
||||||
jsonrpc: "2.0",
|
jsonrpc: "2.0",
|
||||||
id,
|
id,
|
||||||
error: { code: -32602, message: "Invalid params: name and arguments required" },
|
error: {
|
||||||
},
|
code: -32602,
|
||||||
{ status: 400 }
|
message: "Invalid params: name and arguments required",
|
||||||
);
|
},
|
||||||
}
|
},
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Route tool calls to appropriate handlers
|
// Route tool calls to appropriate handlers
|
||||||
try {
|
try {
|
||||||
let result;
|
let result: unknown;
|
||||||
|
|
||||||
if (name === "search_kijiji") {
|
if (name === "search_kijiji") {
|
||||||
const query = args.query;
|
const query = args.query;
|
||||||
if (!query) {
|
if (!query) {
|
||||||
return Response.json({
|
return Response.json({
|
||||||
jsonrpc: "2.0",
|
jsonrpc: "2.0",
|
||||||
id,
|
id,
|
||||||
error: { code: -32602, message: "query parameter is required" },
|
error: { code: -32602, message: "query parameter is required" },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const searchOptions = {
|
const params = new URLSearchParams({ q: query });
|
||||||
location: args.location,
|
if (args.location) params.append("location", args.location);
|
||||||
category: args.category,
|
if (args.category) params.append("category", args.category);
|
||||||
keywords: args.keywords,
|
if (args.keywords) params.append("keywords", args.keywords);
|
||||||
sortBy: args.sortBy,
|
if (args.sortBy) params.append("sortBy", args.sortBy);
|
||||||
sortOrder: args.sortOrder,
|
if (args.sortOrder) params.append("sortOrder", args.sortOrder);
|
||||||
maxPages: args.maxPages || 5,
|
if (args.maxPages)
|
||||||
priceMin: args.priceMin,
|
params.append("maxPages", args.maxPages.toString());
|
||||||
priceMax: args.priceMax,
|
if (args.priceMin)
|
||||||
};
|
params.append("priceMin", args.priceMin.toString());
|
||||||
const items = await fetchKijijiItems(
|
if (args.priceMax)
|
||||||
query,
|
params.append("priceMax", args.priceMax.toString());
|
||||||
1,
|
if (args.cookies) params.append("cookies", args.cookies);
|
||||||
"https://www.kijiji.ca",
|
|
||||||
searchOptions,
|
|
||||||
{}
|
|
||||||
);
|
|
||||||
result = items || [];
|
|
||||||
} else if (name === "search_facebook") {
|
|
||||||
const query = args.query;
|
|
||||||
if (!query) {
|
|
||||||
return Response.json({
|
|
||||||
jsonrpc: "2.0",
|
|
||||||
id,
|
|
||||||
error: { code: -32602, message: "query parameter is required" },
|
|
||||||
});
|
|
||||||
}
|
|
||||||
const items = await fetchFacebookItems(
|
|
||||||
query,
|
|
||||||
1,
|
|
||||||
args.location || "toronto",
|
|
||||||
args.maxItems || 25,
|
|
||||||
args.cookiesSource,
|
|
||||||
undefined
|
|
||||||
);
|
|
||||||
result = items || [];
|
|
||||||
} else if (name === "search_ebay") {
|
|
||||||
const query = args.query;
|
|
||||||
if (!query) {
|
|
||||||
return Response.json({
|
|
||||||
jsonrpc: "2.0",
|
|
||||||
id,
|
|
||||||
error: { code: -32602, message: "query parameter is required" },
|
|
||||||
});
|
|
||||||
}
|
|
||||||
const items = await fetchEbayItems(query, 1, {
|
|
||||||
minPrice: args.minPrice,
|
|
||||||
maxPrice: args.maxPrice,
|
|
||||||
strictMode: args.strictMode || false,
|
|
||||||
exclusions: args.exclusions || [],
|
|
||||||
keywords: args.keywords || [query],
|
|
||||||
buyItNowOnly: args.buyItNowOnly !== false,
|
|
||||||
canadaOnly: args.canadaOnly !== false,
|
|
||||||
});
|
|
||||||
|
|
||||||
const results = args.maxItems ? items.slice(0, args.maxItems) : items;
|
console.log(
|
||||||
result = results || [];
|
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
|
||||||
} else {
|
);
|
||||||
return Response.json({
|
const response = await Promise.race([
|
||||||
jsonrpc: "2.0",
|
fetch(`${API_BASE_URL}/kijiji?${params.toString()}`),
|
||||||
id,
|
new Promise<Response>((_, reject) =>
|
||||||
error: { code: -32601, message: `Unknown tool: ${name}` },
|
setTimeout(
|
||||||
});
|
() =>
|
||||||
}
|
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||||
|
API_TIMEOUT,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
|
||||||
return Response.json({
|
if (!response.ok) {
|
||||||
jsonrpc: "2.0",
|
const errorText = await response.text();
|
||||||
id,
|
console.error(
|
||||||
result: {
|
`[MCP] Kijiji API error ${response.status}: ${errorText}`,
|
||||||
content: [
|
);
|
||||||
{
|
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||||
type: "text",
|
}
|
||||||
text: JSON.stringify(result, null, 2),
|
result = await response.json();
|
||||||
},
|
console.log(
|
||||||
],
|
`[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||||
},
|
);
|
||||||
});
|
} else if (name === "search_facebook") {
|
||||||
} catch (error) {
|
const query = args.query;
|
||||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
if (!query) {
|
||||||
return Response.json({
|
return Response.json({
|
||||||
jsonrpc: "2.0",
|
jsonrpc: "2.0",
|
||||||
id,
|
id,
|
||||||
error: { code: -32603, message: `Tool execution failed: ${errorMessage}` },
|
error: { code: -32602, message: "query parameter is required" },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
const params = new URLSearchParams({ q: query });
|
||||||
|
if (args.location) params.append("location", args.location);
|
||||||
|
if (args.maxItems)
|
||||||
|
params.append("maxItems", args.maxItems.toString());
|
||||||
|
if (args.cookiesSource) params.append("cookies", args.cookiesSource);
|
||||||
|
|
||||||
// Method not found
|
console.log(
|
||||||
return Response.json(
|
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
|
||||||
{
|
);
|
||||||
jsonrpc: "2.0",
|
const response = await Promise.race([
|
||||||
id,
|
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
|
||||||
error: { code: -32601, message: `Method not found: ${method}` },
|
new Promise<Response>((_, reject) =>
|
||||||
},
|
setTimeout(
|
||||||
{ status: 404 }
|
() =>
|
||||||
);
|
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||||
} catch (error) {
|
API_TIMEOUT,
|
||||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
),
|
||||||
return Response.json(
|
),
|
||||||
{
|
]);
|
||||||
jsonrpc: "2.0",
|
|
||||||
error: { code: -32700, message: `Parse error: ${errorMessage}` },
|
if (!response.ok) {
|
||||||
},
|
const errorText = await response.text();
|
||||||
{ status: 400 }
|
console.error(
|
||||||
);
|
`[MCP] Facebook API error ${response.status}: ${errorText}`,
|
||||||
}
|
);
|
||||||
|
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||||
|
}
|
||||||
|
result = await response.json();
|
||||||
|
console.log(
|
||||||
|
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||||
|
);
|
||||||
|
} else if (name === "search_ebay") {
|
||||||
|
const query = args.query;
|
||||||
|
if (!query) {
|
||||||
|
return Response.json({
|
||||||
|
jsonrpc: "2.0",
|
||||||
|
id,
|
||||||
|
error: { code: -32602, message: "query parameter is required" },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
const params = new URLSearchParams({ q: query });
|
||||||
|
if (args.minPrice)
|
||||||
|
params.append("minPrice", args.minPrice.toString());
|
||||||
|
if (args.maxPrice)
|
||||||
|
params.append("maxPrice", args.maxPrice.toString());
|
||||||
|
if (args.strictMode !== undefined)
|
||||||
|
params.append("strictMode", args.strictMode.toString());
|
||||||
|
if (args.exclusions?.length)
|
||||||
|
params.append("exclusions", args.exclusions.join(","));
|
||||||
|
if (args.keywords?.length)
|
||||||
|
params.append("keywords", args.keywords.join(","));
|
||||||
|
if (args.buyItNowOnly !== undefined)
|
||||||
|
params.append("buyItNowOnly", args.buyItNowOnly.toString());
|
||||||
|
if (args.canadaOnly !== undefined)
|
||||||
|
params.append("canadaOnly", args.canadaOnly.toString());
|
||||||
|
if (args.maxItems)
|
||||||
|
params.append("maxItems", args.maxItems.toString());
|
||||||
|
if (args.cookies) params.append("cookies", args.cookies);
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
|
||||||
|
);
|
||||||
|
const response = await Promise.race([
|
||||||
|
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
|
||||||
|
new Promise<Response>((_, reject) =>
|
||||||
|
setTimeout(
|
||||||
|
() =>
|
||||||
|
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||||
|
API_TIMEOUT,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text();
|
||||||
|
console.error(
|
||||||
|
`[MCP] eBay API error ${response.status}: ${errorText}`,
|
||||||
|
);
|
||||||
|
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||||
|
}
|
||||||
|
result = await response.json();
|
||||||
|
console.log(
|
||||||
|
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
return Response.json({
|
||||||
|
jsonrpc: "2.0",
|
||||||
|
id,
|
||||||
|
error: { code: -32601, message: `Unknown tool: ${name}` },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return Response.json({
|
||||||
|
jsonrpc: "2.0",
|
||||||
|
id,
|
||||||
|
result: {
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "text",
|
||||||
|
text: JSON.stringify(result, null, 2),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
const errorMessage =
|
||||||
|
error instanceof Error ? error.message : "Unknown error";
|
||||||
|
return Response.json({
|
||||||
|
jsonrpc: "2.0",
|
||||||
|
id,
|
||||||
|
error: {
|
||||||
|
code: -32603,
|
||||||
|
message: `Tool execution failed: ${errorMessage}`,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method not found
|
||||||
|
return Response.json(
|
||||||
|
{
|
||||||
|
jsonrpc: "2.0",
|
||||||
|
id,
|
||||||
|
error: { code: -32601, message: `Method not found: ${method}` },
|
||||||
|
},
|
||||||
|
{ status: 404 },
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
const errorMessage =
|
||||||
|
error instanceof Error ? error.message : "Unknown error";
|
||||||
|
return Response.json(
|
||||||
|
{
|
||||||
|
jsonrpc: "2.0",
|
||||||
|
error: { code: -32700, message: `Parse error: ${errorMessage}` },
|
||||||
|
},
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,23 +3,25 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
export const serverCard = {
|
export const serverCard = {
|
||||||
$schema: "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
|
$schema:
|
||||||
version: "1.0",
|
"https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
|
||||||
protocolVersion: "2025-06-18",
|
version: "1.0",
|
||||||
serverInfo: {
|
protocolVersion: "2025-06-18",
|
||||||
name: "marketplace-scrapers",
|
serverInfo: {
|
||||||
title: "Marketplace Scrapers MCP Server",
|
name: "marketplace-scrapers",
|
||||||
version: "1.0.0",
|
title: "Marketplace Scrapers MCP Server",
|
||||||
},
|
version: "1.0.0",
|
||||||
transport: {
|
},
|
||||||
type: "streamable-http",
|
transport: {
|
||||||
endpoint: "/mcp",
|
type: "streamable-http",
|
||||||
},
|
endpoint: "/mcp",
|
||||||
capabilities: {
|
},
|
||||||
tools: {
|
capabilities: {
|
||||||
listChanged: true,
|
tools: {
|
||||||
},
|
listChanged: true,
|
||||||
},
|
},
|
||||||
description: "Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
|
},
|
||||||
tools: "dynamic",
|
description:
|
||||||
|
"Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
|
||||||
|
tools: "dynamic",
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -3,135 +3,148 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
export const tools = [
|
export const tools = [
|
||||||
{
|
{
|
||||||
name: "search_kijiji",
|
name: "search_kijiji",
|
||||||
description: "Search Kijiji marketplace for listings matching a query",
|
description: "Search Kijiji marketplace for listings matching a query",
|
||||||
inputSchema: {
|
inputSchema: {
|
||||||
type: "object",
|
type: "object",
|
||||||
properties: {
|
properties: {
|
||||||
query: {
|
query: {
|
||||||
type: "string",
|
type: "string",
|
||||||
description: "Search query for Kijiji listings",
|
description: "Search query for Kijiji listings",
|
||||||
},
|
},
|
||||||
location: {
|
location: {
|
||||||
type: "string",
|
type: "string",
|
||||||
description: "Location name or ID (e.g., 'toronto', 'gta', 'ontario')",
|
description:
|
||||||
},
|
"Location name or ID (e.g., 'toronto', 'gta', 'ontario')",
|
||||||
category: {
|
},
|
||||||
type: "string",
|
category: {
|
||||||
description: "Category name or ID (e.g., 'computers', 'furniture', 'bikes')",
|
type: "string",
|
||||||
},
|
description:
|
||||||
keywords: {
|
"Category name or ID (e.g., 'computers', 'furniture', 'bikes')",
|
||||||
type: "string",
|
},
|
||||||
description: "Additional keywords to filter results",
|
keywords: {
|
||||||
},
|
type: "string",
|
||||||
sortBy: {
|
description: "Additional keywords to filter results",
|
||||||
type: "string",
|
},
|
||||||
description: "Sort results by field",
|
sortBy: {
|
||||||
enum: ["relevancy", "date", "price", "distance"],
|
type: "string",
|
||||||
default: "relevancy",
|
description: "Sort results by field",
|
||||||
},
|
enum: ["relevancy", "date", "price", "distance"],
|
||||||
sortOrder: {
|
default: "relevancy",
|
||||||
type: "string",
|
},
|
||||||
description: "Sort order",
|
sortOrder: {
|
||||||
enum: ["asc", "desc"],
|
type: "string",
|
||||||
default: "desc",
|
description: "Sort order",
|
||||||
},
|
enum: ["asc", "desc"],
|
||||||
maxPages: {
|
default: "desc",
|
||||||
type: "number",
|
},
|
||||||
description: "Maximum pages to fetch (~40 items per page)",
|
maxPages: {
|
||||||
default: 5,
|
type: "number",
|
||||||
},
|
description: "Maximum pages to fetch (~40 items per page)",
|
||||||
priceMin: {
|
default: 5,
|
||||||
type: "number",
|
},
|
||||||
description: "Minimum price in cents",
|
priceMin: {
|
||||||
},
|
type: "number",
|
||||||
priceMax: {
|
description: "Minimum price in cents",
|
||||||
type: "number",
|
},
|
||||||
description: "Maximum price in cents",
|
priceMax: {
|
||||||
},
|
type: "number",
|
||||||
},
|
description: "Maximum price in cents",
|
||||||
required: ["query"],
|
},
|
||||||
},
|
cookies: {
|
||||||
},
|
type: "string",
|
||||||
{
|
description:
|
||||||
name: "search_facebook",
|
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
|
||||||
description: "Search Facebook Marketplace for listings matching a query",
|
},
|
||||||
inputSchema: {
|
},
|
||||||
type: "object",
|
required: ["query"],
|
||||||
properties: {
|
},
|
||||||
query: {
|
},
|
||||||
type: "string",
|
{
|
||||||
description: "Search query for Facebook Marketplace listings",
|
name: "search_facebook",
|
||||||
},
|
description: "Search Facebook Marketplace for listings matching a query",
|
||||||
location: {
|
inputSchema: {
|
||||||
type: "string",
|
type: "object",
|
||||||
description: "Location for search (e.g., 'toronto')",
|
properties: {
|
||||||
default: "toronto",
|
query: {
|
||||||
},
|
type: "string",
|
||||||
maxItems: {
|
description: "Search query for Facebook Marketplace listings",
|
||||||
type: "number",
|
},
|
||||||
description: "Maximum number of items to return",
|
location: {
|
||||||
default: 5,
|
type: "string",
|
||||||
},
|
description: "Location for search (e.g., 'toronto')",
|
||||||
cookiesSource: {
|
default: "toronto",
|
||||||
type: "string",
|
},
|
||||||
description: "Optional Facebook session cookies source",
|
maxItems: {
|
||||||
},
|
type: "number",
|
||||||
},
|
description: "Maximum number of items to return",
|
||||||
required: ["query"],
|
default: 5,
|
||||||
},
|
},
|
||||||
},
|
cookiesSource: {
|
||||||
{
|
type: "string",
|
||||||
name: "search_ebay",
|
description: "Optional Facebook session cookies source",
|
||||||
description: "Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
|
},
|
||||||
inputSchema: {
|
},
|
||||||
type: "object",
|
required: ["query"],
|
||||||
properties: {
|
},
|
||||||
query: {
|
},
|
||||||
type: "string",
|
{
|
||||||
description: "Search query for eBay listings",
|
name: "search_ebay",
|
||||||
},
|
description:
|
||||||
minPrice: {
|
"Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
|
||||||
type: "number",
|
inputSchema: {
|
||||||
description: "Minimum price filter",
|
type: "object",
|
||||||
},
|
properties: {
|
||||||
maxPrice: {
|
query: {
|
||||||
type: "number",
|
type: "string",
|
||||||
description: "Maximum price filter",
|
description: "Search query for eBay listings",
|
||||||
},
|
},
|
||||||
strictMode: {
|
minPrice: {
|
||||||
type: "boolean",
|
type: "number",
|
||||||
description: "Enable strict search mode",
|
description: "Minimum price filter",
|
||||||
default: false,
|
},
|
||||||
},
|
maxPrice: {
|
||||||
exclusions: {
|
type: "number",
|
||||||
type: "array",
|
description: "Maximum price filter",
|
||||||
items: { type: "string" },
|
},
|
||||||
description: "Terms to exclude from results",
|
strictMode: {
|
||||||
},
|
type: "boolean",
|
||||||
keywords: {
|
description: "Enable strict search mode",
|
||||||
type: "array",
|
default: false,
|
||||||
items: { type: "string" },
|
},
|
||||||
description: "Keywords to include in search",
|
exclusions: {
|
||||||
},
|
type: "array",
|
||||||
buyItNowOnly: {
|
items: { type: "string" },
|
||||||
type: "boolean",
|
description: "Terms to exclude from results",
|
||||||
description: "Include only Buy It Now listings (exclude auctions)",
|
},
|
||||||
default: true,
|
keywords: {
|
||||||
},
|
type: "array",
|
||||||
canadaOnly: {
|
items: { type: "string" },
|
||||||
type: "boolean",
|
description: "Keywords to include in search",
|
||||||
description: "Include only Canadian sellers/listings",
|
},
|
||||||
default: true,
|
buyItNowOnly: {
|
||||||
},
|
type: "boolean",
|
||||||
maxItems: {
|
description: "Include only Buy It Now listings (exclude auctions)",
|
||||||
type: "number",
|
default: true,
|
||||||
description: "Maximum number of items to return",
|
},
|
||||||
default: 5,
|
canadaOnly: {
|
||||||
},
|
type: "boolean",
|
||||||
},
|
description: "Include only Canadian sellers/listings",
|
||||||
required: ["query"],
|
default: true,
|
||||||
},
|
},
|
||||||
},
|
maxItems: {
|
||||||
|
type: "number",
|
||||||
|
description: "Maximum number of items to return",
|
||||||
|
default: 5,
|
||||||
|
},
|
||||||
|
cookies: {
|
||||||
|
type: "string",
|
||||||
|
description:
|
||||||
|
"Optional: eBay session cookies to bypass bot detection (format: 'name1=value1; name2=value2')",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required: ["query"],
|
||||||
|
},
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|||||||
26
scripts/biome-symlink.sh
Executable file
26
scripts/biome-symlink.sh
Executable file
@@ -0,0 +1,26 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Get the path to the system biome executable
|
||||||
|
BIOME_PATH=$(which biome)
|
||||||
|
|
||||||
|
if [ -z "$BIOME_PATH" ]; then
|
||||||
|
echo "Error: biome executable not found in PATH"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Find all biome executables in node_modules
|
||||||
|
files=$(fd biome node_modules --type executable --no-ignore --follow)
|
||||||
|
|
||||||
|
if [ -z "$files" ]; then
|
||||||
|
echo "No biome executables found in node_modules"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Replace each with a symlink to the system biome
|
||||||
|
for file in $files; do
|
||||||
|
echo "Replacing $file with symlink to $BIOME_PATH"
|
||||||
|
rm "$file"
|
||||||
|
ln -s "$BIOME_PATH" "$file"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Done."
|
||||||
30
scripts/remove-eslint.sh
Executable file
30
scripts/remove-eslint.sh
Executable file
@@ -0,0 +1,30 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
PATTERN="eslint"
|
||||||
|
FILES="$(fd .)" # Or use 'find .' to search recursively
|
||||||
|
|
||||||
|
for file in $FILES; do
|
||||||
|
if [[ -f "$file" ]]; then
|
||||||
|
# 1. Use rg with line numbers (-n) and only the matched line (-o)
|
||||||
|
# 2. Use awk to print ONLY the line number (field 1)
|
||||||
|
# 3. Use xargs to pass multiple line numbers to a single sed command
|
||||||
|
|
||||||
|
LINE_NUMBERS=$(rg --line-number --no-filename "$PATTERN" "$file" | awk -F':' '{print $1}' | tr '\n' ',')
|
||||||
|
|
||||||
|
# Remove trailing comma if any
|
||||||
|
LINE_NUMBERS=${LINE_NUMBERS%,}
|
||||||
|
|
||||||
|
if [[ -n "$LINE_NUMBERS" ]]; then
|
||||||
|
echo "Deleting lines $LINE_NUMBERS from $file..."
|
||||||
|
|
||||||
|
# Use sed to delete the specified comma-separated line numbers in-place (-i)
|
||||||
|
# NOTE: The syntax for -i might vary slightly between GNU sed (Linux) and BSD sed (macOS).
|
||||||
|
sed -i.bak "${LINE_NUMBERS}d" "$file"
|
||||||
|
|
||||||
|
# Optional: Remove the backup file created by sed -i.bak
|
||||||
|
# rm "${file}.bak"
|
||||||
|
else
|
||||||
|
echo "$file: No lines matching pattern found."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
25
scripts/start.sh
Executable file
25
scripts/start.sh
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Trap SIGTERM and SIGINT for graceful shutdown
|
||||||
|
trap 'echo "Received shutdown signal, stopping services..."; kill -TERM $API_PID $MCP_PID 2>/dev/null; wait' TERM INT
|
||||||
|
|
||||||
|
# Start API Server in background
|
||||||
|
echo "Starting API Server on port ${API_PORT:-4005}..."
|
||||||
|
bun dist/api/index.js &
|
||||||
|
API_PID=$!
|
||||||
|
|
||||||
|
# Give API server a moment to initialize
|
||||||
|
sleep 1
|
||||||
|
|
||||||
|
# Start MCP Server in background
|
||||||
|
echo "Starting MCP Server on port ${API_PORT:-4006}..."
|
||||||
|
bun dist/mcp/index.js &
|
||||||
|
MCP_PID=$!
|
||||||
|
|
||||||
|
echo "Both services started successfully"
|
||||||
|
echo "API Server PID: $API_PID"
|
||||||
|
echo "MCP Server PID: $MCP_PID"
|
||||||
|
|
||||||
|
# Wait for both processes
|
||||||
|
wait $API_PID $MCP_PID
|
||||||
Reference in New Issue
Block a user