Compare commits
31 Commits
da23ca1c3f
...
update
| Author | SHA1 | Date | |
|---|---|---|---|
| e4ab145d70 | |||
| 1dce0392e3 | |||
| 251fcbb7d9 | |||
| 9bc57d6b54 | |||
| 4a467c9f02 | |||
| f944d319c2 | |||
| cf9784a565 | |||
| df0c528535 | |||
| 2f97d3eafd | |||
| 65eb8d1724 | |||
| f3839aba54 | |||
| 90b98bfb09 | |||
| eb6705df0f | |||
| 72525609ed | |||
| 8b0a65860c | |||
| f9b1c7e096 | |||
| 9edc74cbeb | |||
| ee0fca826d | |||
| f7372612fb | |||
| bce126664e | |||
| 8cbf11538e | |||
| 79f47fdaef | |||
| de5069bf2b | |||
| 637f1a4e75 | |||
| 441ff436c4 | |||
| 1f53ec912a | |||
| 053efd815b | |||
| d619fa5d77 | |||
| 050fd0adba | |||
| 7b106c91ce | |||
| 6e0487f8f3 |
181
.dockerignore
181
.dockerignore
@@ -1,145 +1,84 @@
|
||||
# Dependencies
|
||||
# =============================================================================
|
||||
# Dependencies & Build Output
|
||||
# =============================================================================
|
||||
node_modules/
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
bun.sum
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage/
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
jspm_packages/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
dist/
|
||||
out/
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variables file
|
||||
# =============================================================================
|
||||
# Sensitive Files
|
||||
# =============================================================================
|
||||
.env
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.*
|
||||
.envrc
|
||||
cookies/
|
||||
*.pem
|
||||
*.key
|
||||
*.cert
|
||||
*secret*
|
||||
*credential*
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
# =============================================================================
|
||||
# Development Tools & Config
|
||||
# =============================================================================
|
||||
# Nix/Devenv
|
||||
.devenv/
|
||||
.devenv.flake.nix
|
||||
devenv.*
|
||||
.direnv/
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
# Linting/Formatting
|
||||
biome.json
|
||||
.eslintcache
|
||||
.pre-commit-config.yaml
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
public
|
||||
|
||||
# Vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# IDE and editor files
|
||||
# IDE/Editor
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
# AI Assistant Config
|
||||
.claude/
|
||||
CLAUDE.md
|
||||
AGENTS.md
|
||||
opencode.jsonc
|
||||
|
||||
# Git
|
||||
.git
|
||||
# =============================================================================
|
||||
# Documentation (not needed at runtime)
|
||||
# =============================================================================
|
||||
README.md
|
||||
*.md
|
||||
docs/
|
||||
|
||||
# =============================================================================
|
||||
# Git & Docker (avoid recursive inclusion)
|
||||
# =============================================================================
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Docker
|
||||
Dockerfile*
|
||||
.dockerignore
|
||||
|
||||
# Documentation
|
||||
README.md
|
||||
docs/
|
||||
|
||||
# Test files
|
||||
# =============================================================================
|
||||
# Testing & Coverage
|
||||
# =============================================================================
|
||||
test/
|
||||
tests/
|
||||
*.test.js
|
||||
*.test.ts
|
||||
*.spec.js
|
||||
*.spec.ts
|
||||
coverage/
|
||||
*.lcov
|
||||
.nyc_output/
|
||||
|
||||
# Development files
|
||||
CLAUDE.md
|
||||
devenv.*
|
||||
# =============================================================================
|
||||
# OS & Misc
|
||||
# =============================================================================
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
*.log
|
||||
|
||||
# Runtime cookies/config
|
||||
cookies/
|
||||
*.pid
|
||||
.cache/
|
||||
examples/
|
||||
scripts/
|
||||
|
||||
54
AGENTS.md
54
AGENTS.md
@@ -83,7 +83,7 @@ HTTP server using `Bun.serve()` on port 4005 (or `PORT` env var).
|
||||
- `GET /api/status` - Health check
|
||||
- `GET /api/kijiji?q={query}` - Search Kijiji
|
||||
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
|
||||
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=` - Search eBay
|
||||
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=&cookies=` - Search eBay
|
||||
- `GET /api/*` - 404 fallback
|
||||
|
||||
### MCP Server (`@marketplace-scrapers/mcp-server`)
|
||||
@@ -96,7 +96,7 @@ MCP JSON-RPC 2.0 server on port 4006 (or `MCP_PORT` env var).
|
||||
**Tools:**
|
||||
- `search_kijiji` - Search Kijiji (query, maxItems)
|
||||
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
|
||||
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems)
|
||||
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems, cookies)
|
||||
|
||||
## API Response Formats
|
||||
|
||||
@@ -117,6 +117,52 @@ All scrapers return arrays of listing objects with these common fields:
|
||||
### eBay-specific fields
|
||||
Minimal - mainly the common fields
|
||||
|
||||
## Cookie Management
|
||||
|
||||
Both **Facebook Marketplace** and **eBay** require valid session cookies for reliable scraping.
|
||||
|
||||
### Cookie Priority Hierarchy (High → Low)
|
||||
All scrapers follow this loading order:
|
||||
1. **URL/API Parameter** - Passed directly via `cookies` parameter (highest priority)
|
||||
2. **Environment Variable** - `FACEBOOK_COOKIE` or `EBAY_COOKIE`
|
||||
3. **Cookie File** - `cookies/facebook.json` or `cookies/ebay.json` (fallback)
|
||||
|
||||
### Facebook Cookies
|
||||
- **Required for**: Facebook Marketplace scraping
|
||||
- **Format**: JSON array (see `cookies/README.md`)
|
||||
- **Key cookies**: `c_user`, `xs`, `fr`, `datr`, `sb`
|
||||
|
||||
**Setup:**
|
||||
```bash
|
||||
# Option 1: File (fallback)
|
||||
# Create cookies/facebook.json with cookie array
|
||||
|
||||
# Option 2: Environment variable
|
||||
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||
|
||||
# Option 3: URL parameter (highest priority)
|
||||
curl "http://localhost:4005/api/facebook?q=laptop&cookies=[{...}]"
|
||||
```
|
||||
|
||||
### eBay Cookies
|
||||
- **Required for**: Bypassing bot detection
|
||||
- **Format**: Cookie string `"name=value; name2=value2"`
|
||||
- **Key cookies**: `s`, `ds2`, `ebay`, `dp1`, `nonsession`
|
||||
|
||||
**Setup:**
|
||||
```bash
|
||||
# Option 1: File (fallback)
|
||||
# Create cookies/ebay.json with cookie string
|
||||
|
||||
# Option 2: Environment variable
|
||||
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||
|
||||
# Option 3: URL parameter (highest priority)
|
||||
curl "http://localhost:4005/api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE"
|
||||
```
|
||||
|
||||
**Important - eBay Bot Detection**: Without cookies, eBay returns a "Checking your browser" challenge page instead of listings.
|
||||
|
||||
## Technical Details
|
||||
|
||||
- **TypeScript** with path mapping (`@/*` → `src/*`) per package
|
||||
@@ -126,7 +172,7 @@ Minimal - mainly the common fields
|
||||
|
||||
## Development Notes
|
||||
|
||||
- Facebook requires valid session cookies - set `FACEBOOK_COOKIE` env var or create `cookies/facebook.json`
|
||||
- eBay uses custom headers to bypass basic bot detection
|
||||
- **Cookie files** are git-ignored for security (see `cookies/README.md`)
|
||||
- Kijiji parses Apollo state from Next.js hydration data
|
||||
- All scrapers handle retries on 429/5xx errors
|
||||
- Cookie priority ensures flexibility across different deployment environments
|
||||
|
||||
64
biome.json
64
biome.json
@@ -1,34 +1,34 @@
|
||||
{
|
||||
"$schema": "https://biomejs.dev/schemas/2.3.11/schema.json",
|
||||
"vcs": {
|
||||
"enabled": true,
|
||||
"clientKind": "git",
|
||||
"useIgnoreFile": true
|
||||
},
|
||||
"files": {
|
||||
"includes": ["**", "!!**/dist"]
|
||||
},
|
||||
"formatter": {
|
||||
"enabled": true,
|
||||
"indentStyle": "space"
|
||||
},
|
||||
"linter": {
|
||||
"enabled": true,
|
||||
"rules": {
|
||||
"recommended": true
|
||||
}
|
||||
},
|
||||
"javascript": {
|
||||
"formatter": {
|
||||
"quoteStyle": "double"
|
||||
}
|
||||
},
|
||||
"assist": {
|
||||
"enabled": true,
|
||||
"actions": {
|
||||
"source": {
|
||||
"organizeImports": "on"
|
||||
}
|
||||
}
|
||||
}
|
||||
"$schema": "https://biomejs.dev/schemas/2.3.11/schema.json",
|
||||
"vcs": {
|
||||
"enabled": true,
|
||||
"clientKind": "git",
|
||||
"useIgnoreFile": true
|
||||
},
|
||||
"files": {
|
||||
"includes": ["**", "!!**/dist"]
|
||||
},
|
||||
"formatter": {
|
||||
"enabled": true,
|
||||
"indentStyle": "space"
|
||||
},
|
||||
"linter": {
|
||||
"enabled": true,
|
||||
"rules": {
|
||||
"recommended": true
|
||||
}
|
||||
},
|
||||
"javascript": {
|
||||
"formatter": {
|
||||
"quoteStyle": "double"
|
||||
}
|
||||
},
|
||||
"assist": {
|
||||
"enabled": true,
|
||||
"actions": {
|
||||
"source": {
|
||||
"organizeImports": "on"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,24 +1,33 @@
|
||||
# Facebook Marketplace Cookies Setup
|
||||
# Marketplace Cookies Setup
|
||||
|
||||
To use the Facebook Marketplace scraper, you need to provide valid Facebook session cookies.
|
||||
Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings.
|
||||
|
||||
## Option 1: Cookies File (`facebook.json`)
|
||||
## Cookie Priority Hierarchy
|
||||
|
||||
1. Log into Facebook in your browser
|
||||
2. Open Developer Tools → Network tab
|
||||
3. Visit facebook.com/marketplace (ensure you're logged in)
|
||||
4. Look for any marketplace-related requests in the Network tab
|
||||
5. Export cookies from the browser's Application/Storage → Cookies section
|
||||
6. Save the cookies as a JSON array to `facebook.json`
|
||||
All scrapers follow this priority order (highest to lowest):
|
||||
1. **URL Parameter** - Passed directly in API/MCP request (overrides all)
|
||||
2. **Environment Variable** - Set as `FACEBOOK_COOKIE` or `EBAY_COOKIE`
|
||||
3. **Cookie File** - Stored in `facebook.json` or `ebay.json` (fallback)
|
||||
|
||||
The `facebook.json` file should contain Facebook session cookies, particularly:
|
||||
---
|
||||
|
||||
## Facebook Marketplace (`facebook.json`)
|
||||
|
||||
### Required Cookies
|
||||
- `c_user`: Your Facebook user ID
|
||||
- `xs`: Facebook session token
|
||||
- `fr`: Facebook request token
|
||||
- `datr`: Data attribution token
|
||||
- `sb`: Session browser token
|
||||
|
||||
Example structure:
|
||||
### Setup Methods
|
||||
|
||||
**Method 1: Cookie File (Lowest Priority)**
|
||||
1. Log into Facebook in your browser
|
||||
2. Open Developer Tools → Application/Storage → Cookies
|
||||
3. Export cookies as JSON array to `facebook.json`
|
||||
|
||||
Example `facebook.json`:
|
||||
```json
|
||||
[
|
||||
{
|
||||
@@ -27,26 +36,59 @@ Example structure:
|
||||
"domain": ".facebook.com",
|
||||
"path": "/",
|
||||
"secure": true
|
||||
},
|
||||
// ... other cookies
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## Option 2: URL Parameter
|
||||
|
||||
You can pass cookies directly via the `cookies` URL parameter:
|
||||
|
||||
**Method 2: Environment Variable**
|
||||
```bash
|
||||
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||
```
|
||||
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123","domain":".facebook.com",...}]
|
||||
|
||||
**Method 3: URL Parameter (Highest Priority)**
|
||||
```
|
||||
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123",...}]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## eBay (`ebay.json`)
|
||||
|
||||
eBay has aggressive bot detection that blocks requests without valid session cookies.
|
||||
|
||||
### Setup Methods
|
||||
|
||||
**Method 1: Cookie File (Lowest Priority)**
|
||||
1. Log into eBay in your browser
|
||||
2. Open Developer Tools → Network tab
|
||||
3. Visit ebay.ca and inspect any request headers
|
||||
4. Copy the full `Cookie` header value
|
||||
5. Save as plain text to `ebay.json` (see `ebay.json.example`)
|
||||
|
||||
Example `ebay.json`:
|
||||
```
|
||||
s=VALUE; ds2=VALUE; ebay=VALUE; dp1=VALUE; nonsession=VALUE
|
||||
```
|
||||
|
||||
**Method 2: Environment Variable**
|
||||
```bash
|
||||
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||
```
|
||||
|
||||
**Method 3: URL Parameter (Highest Priority)**
|
||||
```
|
||||
GET /api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE;ebay=VALUE
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Important Notes
|
||||
|
||||
- Cookies must be from an active Facebook session
|
||||
- Cookies expire, so you may need to refresh them periodically
|
||||
- Never share real cookies or commit them to version control
|
||||
- Facebook may block automated scraping even with valid cookies
|
||||
- Cookies must be from active browser sessions
|
||||
- Cookies expire and need periodic refresh
|
||||
- **NEVER** commit real cookies to version control
|
||||
- Platforms may still block automated scraping despite valid cookies
|
||||
|
||||
## Security
|
||||
|
||||
The cookies file is intentionally left out of version control for security reasons.</content>
|
||||
All `*.json` files in this directory are git-ignored for security.</content>
|
||||
|
||||
1
cookies/ebay.json.example
Normal file
1
cookies/ebay.json.example
Normal file
@@ -0,0 +1 @@
|
||||
s=YOUR_VALUE; ds2=YOUR_VALUE; ebay=YOUR_VALUE; dp1=YOUR_VALUE; nonsession=YOUR_VALUE
|
||||
9
opencode.jsonc
Normal file
9
opencode.jsonc
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"$schema": "https://opencode.ai/config.json",
|
||||
"mcp": {
|
||||
"marketplace-scrape": {
|
||||
"type": "remote",
|
||||
"url": "http://localhost:4006/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
12
package.json
12
package.json
@@ -2,11 +2,19 @@
|
||||
"name": "marketplace-scrapers-monorepo",
|
||||
"version": "1.0.0",
|
||||
"scripts": {
|
||||
"ci": "biome ci"
|
||||
"ci": "biome ci",
|
||||
"clean": "rm -rf dist",
|
||||
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
|
||||
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
|
||||
"build:all": "bun run build:api && bun run build:mcp",
|
||||
"build": "bun run clean && bun run build:all",
|
||||
"start": "./scripts/start.sh"
|
||||
},
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"workspaces": ["packages/*"],
|
||||
"workspaces": [
|
||||
"packages/*"
|
||||
],
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "2.3.11"
|
||||
}
|
||||
|
||||
@@ -1,30 +1,30 @@
|
||||
import { statusRoute } from "./routes/status";
|
||||
import { kijijiRoute } from "./routes/kijiji";
|
||||
import { facebookRoute } from "./routes/facebook";
|
||||
import { ebayRoute } from "./routes/ebay";
|
||||
import { facebookRoute } from "./routes/facebook";
|
||||
import { kijijiRoute } from "./routes/kijiji";
|
||||
import { statusRoute } from "./routes/status";
|
||||
|
||||
const PORT = process.env.PORT || 4005;
|
||||
|
||||
const server = Bun.serve({
|
||||
port: PORT as number | string,
|
||||
idleTimeout: 0,
|
||||
routes: {
|
||||
// Health check endpoint
|
||||
"/api/status": statusRoute,
|
||||
port: PORT as number | string,
|
||||
idleTimeout: 0,
|
||||
routes: {
|
||||
// Health check endpoint
|
||||
"/api/status": statusRoute,
|
||||
|
||||
// Marketplace search endpoints
|
||||
"/api/kijiji": kijijiRoute,
|
||||
"/api/facebook": facebookRoute,
|
||||
"/api/ebay": ebayRoute,
|
||||
// Marketplace search endpoints
|
||||
"/api/kijiji": kijijiRoute,
|
||||
"/api/facebook": facebookRoute,
|
||||
"/api/ebay": ebayRoute,
|
||||
|
||||
// Fallback for unmatched /api routes
|
||||
"/api/*": Response.json({ message: "Not found" }, { status: 404 }),
|
||||
},
|
||||
// Fallback for unmatched /api routes
|
||||
"/api/*": Response.json({ message: "Not found" }, { status: 404 }),
|
||||
},
|
||||
|
||||
// Fallback for all other routes
|
||||
fetch(req: Request) {
|
||||
return new Response("Not Found", { status: 404 });
|
||||
},
|
||||
// Fallback for all other routes
|
||||
fetch(_req: Request) {
|
||||
return new Response("Not Found", { status: 404 });
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`API Server running on ${server.hostname}:${server.port}`);
|
||||
|
||||
@@ -1,60 +1,68 @@
|
||||
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
||||
|
||||
/**
|
||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
|
||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}&cookies={cookies}
|
||||
* Search eBay for listings (default: Buy It Now only, Canada only)
|
||||
* Optional: Pass cookies parameter to bypass bot detection
|
||||
*/
|
||||
export async function ebayRoute(req: Request): Promise<Response> {
|
||||
const reqUrl = new URL(req.url);
|
||||
try {
|
||||
const reqUrl = new URL(req.url);
|
||||
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message:
|
||||
"Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message:
|
||||
"Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
|
||||
// Parse optional parameters with defaults
|
||||
const minPrice = reqUrl.searchParams.get("minPrice")
|
||||
? parseInt(reqUrl.searchParams.get("minPrice")!)
|
||||
: undefined;
|
||||
const maxPrice = reqUrl.searchParams.get("maxPrice")
|
||||
? parseInt(reqUrl.searchParams.get("maxPrice")!)
|
||||
: undefined;
|
||||
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
|
||||
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
|
||||
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
|
||||
const exclusionsParam = reqUrl.searchParams.get("exclusions");
|
||||
const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : [];
|
||||
const keywordsParam = reqUrl.searchParams.get("keywords");
|
||||
const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY];
|
||||
const minPriceParam = reqUrl.searchParams.get("minPrice");
|
||||
const minPrice = minPriceParam ? parseInt(minPriceParam, 10) : undefined;
|
||||
const maxPriceParam = reqUrl.searchParams.get("maxPrice");
|
||||
const maxPrice = maxPriceParam ? parseInt(maxPriceParam, 10) : undefined;
|
||||
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
|
||||
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
|
||||
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
|
||||
const exclusionsParam = reqUrl.searchParams.get("exclusions");
|
||||
const exclusions = exclusionsParam
|
||||
? exclusionsParam.split(",").map((s) => s.trim())
|
||||
: [];
|
||||
const keywordsParam = reqUrl.searchParams.get("keywords");
|
||||
const keywords = keywordsParam
|
||||
? keywordsParam.split(",").map((s) => s.trim())
|
||||
: [SEARCH_QUERY];
|
||||
|
||||
try {
|
||||
const items = await fetchEbayItems(SEARCH_QUERY, 5, {
|
||||
minPrice,
|
||||
maxPrice,
|
||||
strictMode,
|
||||
exclusions,
|
||||
keywords,
|
||||
buyItNowOnly,
|
||||
canadaOnly,
|
||||
});
|
||||
if (!items || items.length === 0)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("eBay scraping error:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json(
|
||||
{ message: errorMessage },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
|
||||
const cookies = reqUrl.searchParams.get("cookies") || undefined;
|
||||
|
||||
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
|
||||
minPrice,
|
||||
maxPrice,
|
||||
strictMode,
|
||||
exclusions,
|
||||
keywords,
|
||||
buyItNowOnly,
|
||||
canadaOnly,
|
||||
cookies,
|
||||
});
|
||||
|
||||
const results = maxItems ? items.slice(0, maxItems) : items;
|
||||
|
||||
if (!results || results.length === 0)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(results, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("eBay scraping error:", error);
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json({ message: errorMessage }, { status: 400 });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,36 +5,42 @@ import { fetchFacebookItems } from "@marketplace-scrapers/core";
|
||||
* Search Facebook Marketplace for listings
|
||||
*/
|
||||
export async function facebookRoute(req: Request): Promise<Response> {
|
||||
const reqUrl = new URL(req.url);
|
||||
const reqUrl = new URL(req.url);
|
||||
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message:
|
||||
"Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message: "Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
|
||||
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
|
||||
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
|
||||
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
|
||||
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
|
||||
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : 25;
|
||||
|
||||
try {
|
||||
const items = await fetchFacebookItems(SEARCH_QUERY, 5, LOCATION, 25, COOKIES_SOURCE);
|
||||
if (!items || items.length === 0)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("Facebook scraping error:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json(
|
||||
{ message: errorMessage },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
try {
|
||||
const items = await fetchFacebookItems(
|
||||
SEARCH_QUERY,
|
||||
1,
|
||||
LOCATION,
|
||||
maxItems,
|
||||
COOKIES_SOURCE,
|
||||
undefined,
|
||||
);
|
||||
if (!items || items.length === 0)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("Facebook scraping error:", error);
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json({ message: errorMessage }, { status: 400 });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,33 +5,63 @@ import { fetchKijijiItems } from "@marketplace-scrapers/core";
|
||||
* Search Kijiji marketplace for listings
|
||||
*/
|
||||
export async function kijijiRoute(req: Request): Promise<Response> {
|
||||
const reqUrl = new URL(req.url);
|
||||
const reqUrl = new URL(req.url);
|
||||
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message:
|
||||
"Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message: "Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
|
||||
try {
|
||||
const items = await fetchKijijiItems(SEARCH_QUERY, 5);
|
||||
if (!items)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("Kijiji scraping error:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json(
|
||||
{ message: errorMessage },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
const maxPagesParam = reqUrl.searchParams.get("maxPages");
|
||||
const maxPages = maxPagesParam ? parseInt(maxPagesParam, 10) : 5;
|
||||
const priceMinParam = reqUrl.searchParams.get("priceMin");
|
||||
const priceMin = priceMinParam ? parseInt(priceMinParam, 10) : undefined;
|
||||
const priceMaxParam = reqUrl.searchParams.get("priceMax");
|
||||
const priceMax = priceMaxParam ? parseInt(priceMaxParam, 10) : undefined;
|
||||
|
||||
const searchOptions = {
|
||||
location: reqUrl.searchParams.get("location") || undefined,
|
||||
category: reqUrl.searchParams.get("category") || undefined,
|
||||
keywords: reqUrl.searchParams.get("keywords") || undefined,
|
||||
sortBy: reqUrl.searchParams.get("sortBy") as
|
||||
| "relevancy"
|
||||
| "date"
|
||||
| "price"
|
||||
| "distance"
|
||||
| undefined,
|
||||
sortOrder: reqUrl.searchParams.get("sortOrder") as
|
||||
| "desc"
|
||||
| "asc"
|
||||
| undefined,
|
||||
maxPages,
|
||||
priceMin,
|
||||
priceMax,
|
||||
cookies: reqUrl.searchParams.get("cookies") || undefined,
|
||||
};
|
||||
|
||||
try {
|
||||
const items = await fetchKijijiItems(
|
||||
SEARCH_QUERY,
|
||||
4, // 4 requests per second for faster scraping
|
||||
"https://www.kijiji.ca",
|
||||
searchOptions,
|
||||
{},
|
||||
);
|
||||
if (!items)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("Kijiji scraping error:", error);
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json({ message: errorMessage }, { status: 400 });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,5 +2,5 @@
|
||||
* Health check endpoint
|
||||
*/
|
||||
export function statusRoute(): Response {
|
||||
return new Response("OK", { status: 200 });
|
||||
return new Response("OK", { status: 200 });
|
||||
}
|
||||
|
||||
@@ -1,45 +1,43 @@
|
||||
// Export all scrapers
|
||||
export {
|
||||
default as fetchKijijiItems,
|
||||
slugify,
|
||||
resolveLocationId,
|
||||
resolveCategoryId,
|
||||
buildSearchUrl,
|
||||
extractApolloState,
|
||||
parseSearch,
|
||||
parseDetailedListing,
|
||||
HttpError,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
ValidationError,
|
||||
} from "./scrapers/kijiji";
|
||||
export type {
|
||||
KijijiListingDetails,
|
||||
DetailedListing,
|
||||
SearchOptions,
|
||||
ListingFetchOptions,
|
||||
} from "./scrapers/kijiji";
|
||||
|
||||
export {
|
||||
default as fetchFacebookItems,
|
||||
fetchFacebookItem,
|
||||
parseFacebookCookieString,
|
||||
ensureFacebookCookies,
|
||||
extractFacebookMarketplaceData,
|
||||
extractFacebookItemData,
|
||||
parseFacebookAds,
|
||||
parseFacebookItem,
|
||||
} from "./scrapers/facebook";
|
||||
export type { FacebookListingDetails } from "./scrapers/facebook";
|
||||
|
||||
export { default as fetchEbayItems } from "./scrapers/ebay";
|
||||
export type { EbayListingDetails } from "./scrapers/ebay";
|
||||
|
||||
// Export shared utilities
|
||||
export * from "./utils/http";
|
||||
export * from "./utils/delay";
|
||||
export * from "./utils/format";
|
||||
|
||||
export { default as fetchEbayItems } from "./scrapers/ebay";
|
||||
export type { FacebookListingDetails } from "./scrapers/facebook";
|
||||
export {
|
||||
default as fetchFacebookItems,
|
||||
ensureFacebookCookies,
|
||||
extractFacebookItemData,
|
||||
extractFacebookMarketplaceData,
|
||||
fetchFacebookItem,
|
||||
parseFacebookAds,
|
||||
parseFacebookCookieString,
|
||||
parseFacebookItem,
|
||||
} from "./scrapers/facebook";
|
||||
export type {
|
||||
DetailedListing,
|
||||
KijijiListingDetails,
|
||||
ListingFetchOptions,
|
||||
SearchOptions,
|
||||
} from "./scrapers/kijiji";
|
||||
export {
|
||||
buildSearchUrl,
|
||||
default as fetchKijijiItems,
|
||||
extractApolloState,
|
||||
HttpError,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
parseDetailedListing,
|
||||
parseSearch,
|
||||
RateLimitError,
|
||||
resolveCategoryId,
|
||||
resolveLocationId,
|
||||
slugify,
|
||||
ValidationError,
|
||||
} from "./scrapers/kijiji";
|
||||
// Export shared types
|
||||
export * from "./types/common";
|
||||
// Export shared utilities
|
||||
export * from "./utils/cookies";
|
||||
export * from "./utils/delay";
|
||||
export * from "./utils/format";
|
||||
export * from "./utils/http";
|
||||
|
||||
@@ -1,27 +1,36 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { parseHTML } from "linkedom";
|
||||
import { isRecord } from "../utils/http";
|
||||
import {
|
||||
type CookieConfig,
|
||||
formatCookiesForHeader,
|
||||
loadCookiesOptional,
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import type { HTMLString } from "../types/common";
|
||||
|
||||
// eBay cookie configuration
|
||||
const EBAY_COOKIE_CONFIG: CookieConfig = {
|
||||
name: "eBay",
|
||||
domain: ".ebay.ca",
|
||||
envVar: "EBAY_COOKIE",
|
||||
filePath: "./cookies/ebay.json",
|
||||
};
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
|
||||
export interface EbayListingDetails {
|
||||
url: string;
|
||||
title: string;
|
||||
description?: string;
|
||||
listingPrice?: {
|
||||
amountFormatted: string;
|
||||
cents?: number;
|
||||
currency?: string;
|
||||
};
|
||||
listingType?: string;
|
||||
listingStatus?: string;
|
||||
creationDate?: string;
|
||||
endDate?: string;
|
||||
numberOfViews?: number;
|
||||
address?: string | null;
|
||||
url: string;
|
||||
title: string;
|
||||
description?: string;
|
||||
listingPrice?: {
|
||||
amountFormatted: string;
|
||||
cents?: number;
|
||||
currency?: string;
|
||||
};
|
||||
listingType?: string;
|
||||
listingStatus?: string;
|
||||
creationDate?: string;
|
||||
endDate?: string;
|
||||
numberOfViews?: number;
|
||||
address?: string | null;
|
||||
}
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
@@ -29,43 +38,49 @@ export interface EbayListingDetails {
|
||||
/**
|
||||
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
|
||||
*/
|
||||
function parseEbayPrice(priceText: string): { cents: number; currency: string } | null {
|
||||
if (!priceText || typeof priceText !== 'string') return null;
|
||||
function parseEbayPrice(
|
||||
priceText: string,
|
||||
): { cents: number; currency: string } | null {
|
||||
if (!priceText || typeof priceText !== "string") return null;
|
||||
|
||||
// Clean up the price text and extract currency and amount
|
||||
const cleaned = priceText.trim();
|
||||
// Clean up the price text and extract currency and amount
|
||||
const cleaned = priceText.trim();
|
||||
|
||||
// Find all numbers in the string (including decimals)
|
||||
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
|
||||
if (!numberMatches) return null;
|
||||
// Find all numbers in the string (including decimals)
|
||||
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
|
||||
if (!numberMatches) return null;
|
||||
|
||||
const amountStr = numberMatches[0].replace(/,/g, '');
|
||||
const dollars = parseFloat(amountStr);
|
||||
if (isNaN(dollars)) return null;
|
||||
const amountStr = numberMatches[0].replace(/,/g, "");
|
||||
const dollars = parseFloat(amountStr);
|
||||
if (Number.isNaN(dollars)) return null;
|
||||
|
||||
const cents = Math.round(dollars * 100);
|
||||
const cents = Math.round(dollars * 100);
|
||||
|
||||
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
|
||||
let currency = 'USD'; // Default
|
||||
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
|
||||
let currency = "USD"; // Default
|
||||
|
||||
if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) {
|
||||
currency = 'CAD';
|
||||
} else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) {
|
||||
currency = 'USD';
|
||||
}
|
||||
if (
|
||||
cleaned.toUpperCase().includes("CAD") ||
|
||||
cleaned.includes("CA$") ||
|
||||
cleaned.includes("C $")
|
||||
) {
|
||||
currency = "CAD";
|
||||
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
|
||||
currency = "USD";
|
||||
}
|
||||
|
||||
return { cents, currency };
|
||||
return { cents, currency };
|
||||
}
|
||||
|
||||
class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status: number,
|
||||
public readonly url: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status: number,
|
||||
public readonly url: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
@@ -74,290 +89,390 @@ class HttpError extends Error {
|
||||
Parse eBay search page HTML and extract listings using DOM selectors
|
||||
*/
|
||||
function parseEbayListings(
|
||||
htmlString: HTMLString,
|
||||
keywords: string[],
|
||||
exclusions: string[],
|
||||
strictMode: boolean
|
||||
htmlString: HTMLString,
|
||||
keywords: string[],
|
||||
exclusions: string[],
|
||||
strictMode: boolean,
|
||||
): EbayListingDetails[] {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const results: EbayListingDetails[] = [];
|
||||
const { document } = parseHTML(htmlString);
|
||||
const results: EbayListingDetails[] = [];
|
||||
|
||||
// Find all listing links by looking for eBay item URLs (/itm/)
|
||||
const linkElements = document.querySelectorAll('a[href*="itm/"]');
|
||||
// Find all listing links by looking for eBay item URLs (/itm/)
|
||||
const linkElements = document.querySelectorAll('a[href*="itm/"]');
|
||||
|
||||
for (const linkElement of linkElements) {
|
||||
try {
|
||||
// Get href attribute
|
||||
let href = linkElement.getAttribute("href");
|
||||
if (!href) continue;
|
||||
|
||||
for (const linkElement of linkElements) {
|
||||
try {
|
||||
// Get href attribute
|
||||
let href = linkElement.getAttribute('href');
|
||||
if (!href) continue;
|
||||
// Make href absolute
|
||||
if (!href.startsWith("http")) {
|
||||
href = href.startsWith("//")
|
||||
? `https:${href}`
|
||||
: `https://www.ebay.com${href}`;
|
||||
}
|
||||
|
||||
// Make href absolute
|
||||
if (!href.startsWith('http')) {
|
||||
href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`;
|
||||
}
|
||||
// Find the container - go up several levels to find the item container
|
||||
// Modern eBay uses complex nested structures (often 5-10 levels deep)
|
||||
let container: Element | null = linkElement;
|
||||
let depth = 0;
|
||||
const maxDepth = 15;
|
||||
|
||||
// Find the container - go up several levels to find the item container
|
||||
// Modern eBay uses complex nested structures
|
||||
let container = linkElement.parentElement?.parentElement?.parentElement;
|
||||
if (!container) {
|
||||
// Try a different level
|
||||
container = linkElement.parentElement?.parentElement;
|
||||
}
|
||||
if (!container) continue;
|
||||
// Walk up until we find a list item or results container
|
||||
while (container && depth < maxDepth) {
|
||||
const classes = container.className || "";
|
||||
if (
|
||||
classes.includes("s-item") ||
|
||||
classes.includes("srp-results") ||
|
||||
container.tagName === "LI"
|
||||
) {
|
||||
break;
|
||||
}
|
||||
container = container.parentElement;
|
||||
depth++;
|
||||
}
|
||||
|
||||
// Extract title - look for heading or title-related elements near the link
|
||||
// Modern eBay often uses h3, span, or div with text content near the link
|
||||
let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span');
|
||||
if (!container || depth >= maxDepth) continue;
|
||||
|
||||
// If no direct title element, try finding text content around the link
|
||||
if (!titleElement) {
|
||||
// Look for spans or divs with text near this link
|
||||
const nearbySpans = container.querySelectorAll('span, div');
|
||||
for (const span of nearbySpans) {
|
||||
const text = span.textContent?.trim();
|
||||
if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) {
|
||||
titleElement = span;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Extract title - look for heading or title-related elements near the link
|
||||
// Modern eBay often uses h3, span, or div with text content near the link
|
||||
let titleElement = container.querySelector(
|
||||
'h3, [role="heading"], .s-item__title span',
|
||||
);
|
||||
|
||||
let title = titleElement?.textContent?.trim();
|
||||
// If no direct title element, try finding text content around the link
|
||||
if (!titleElement) {
|
||||
// Look for spans or divs with text near this link
|
||||
const nearbySpans = container.querySelectorAll("span, div");
|
||||
for (const span of nearbySpans) {
|
||||
const text = span.textContent?.trim();
|
||||
if (
|
||||
text &&
|
||||
text.length > 10 &&
|
||||
text.length < 200 &&
|
||||
!text.includes("$") &&
|
||||
!text.includes("item")
|
||||
) {
|
||||
titleElement = span;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up eBay UI strings that get included in titles
|
||||
if (title) {
|
||||
// Remove common eBay UI strings that appear at the end of titles
|
||||
const uiStrings = [
|
||||
'Opens in a new window',
|
||||
'Opens in a new tab',
|
||||
'Opens in a new window or tab',
|
||||
'opens in a new window',
|
||||
'opens in a new tab',
|
||||
'opens in a new window or tab'
|
||||
];
|
||||
let title = titleElement?.textContent?.trim();
|
||||
|
||||
for (const uiString of uiStrings) {
|
||||
const uiIndex = title.indexOf(uiString);
|
||||
if (uiIndex !== -1) {
|
||||
title = title.substring(0, uiIndex).trim();
|
||||
break; // Only remove one UI string per title
|
||||
}
|
||||
}
|
||||
// Clean up eBay UI strings that get included in titles
|
||||
if (title) {
|
||||
// Remove common eBay UI strings that appear at the end of titles
|
||||
const uiStrings = [
|
||||
"Opens in a new window",
|
||||
"Opens in a new tab",
|
||||
"Opens in a new window or tab",
|
||||
"opens in a new window",
|
||||
"opens in a new tab",
|
||||
"opens in a new window or tab",
|
||||
];
|
||||
|
||||
// If the title became empty or too short after cleaning, skip this item
|
||||
if (title.length < 10) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
for (const uiString of uiStrings) {
|
||||
const uiIndex = title.indexOf(uiString);
|
||||
if (uiIndex !== -1) {
|
||||
title = title.substring(0, uiIndex).trim();
|
||||
break; // Only remove one UI string per title
|
||||
}
|
||||
}
|
||||
|
||||
if (!title) continue;
|
||||
// If the title became empty or too short after cleaning, skip this item
|
||||
if (title.length < 10) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip irrelevant eBay ads
|
||||
if (title === "Shop on eBay" || title.length < 3) continue;
|
||||
if (!title) continue;
|
||||
|
||||
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
||||
let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]');
|
||||
// Skip irrelevant eBay ads
|
||||
if (title === "Shop on eBay" || title.length < 3) continue;
|
||||
|
||||
// If no direct price class, look for spans containing $ (but not titles)
|
||||
if (!priceElement) {
|
||||
const spansAndElements = container.querySelectorAll('span, div, b, em, strong');
|
||||
for (const el of spansAndElements) {
|
||||
const text = el.textContent?.trim();
|
||||
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
|
||||
if (text && text.includes('$') && text.length < 100 &&
|
||||
!text.includes('laptop') && !text.includes('computer') && !text.includes('intel') &&
|
||||
!text.includes('core') && !text.includes('ram') && !text.includes('ssd') &&
|
||||
! /\d{4}/.test(text) && // Avoid years like "2024"
|
||||
!text.includes('"') // Avoid measurements
|
||||
) {
|
||||
priceElement = el;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
||||
// Updated for 2026 eBay HTML structure
|
||||
let priceElement = container.querySelector(
|
||||
'[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]',
|
||||
);
|
||||
|
||||
// For discounted items, eBay shows both original and sale price
|
||||
// Prefer sale/current price over original/strikethrough price
|
||||
if (priceElement) {
|
||||
// Check if this element or its parent contains multiple price elements
|
||||
const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement;
|
||||
// If no direct price class, look for spans containing $ (but not titles)
|
||||
if (!priceElement) {
|
||||
const spansAndElements = container.querySelectorAll(
|
||||
"span, div, b, em, strong",
|
||||
);
|
||||
for (const el of spansAndElements) {
|
||||
const text = el.textContent?.trim();
|
||||
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
|
||||
if (
|
||||
text?.includes("$") &&
|
||||
text.length < 100 &&
|
||||
!text.includes("laptop") &&
|
||||
!text.includes("computer") &&
|
||||
!text.includes("intel") &&
|
||||
!text.includes("core") &&
|
||||
!text.includes("ram") &&
|
||||
!text.includes("ssd") &&
|
||||
!/\d{4}/.test(text) && // Avoid years like "2024"
|
||||
!text.includes('"') // Avoid measurements
|
||||
) {
|
||||
priceElement = el;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (priceContainer) {
|
||||
// Look for all price elements within this container, including strikethrough prices
|
||||
const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike');
|
||||
// For discounted items, eBay shows both original and sale price
|
||||
// Prefer sale/current price over original/strikethrough price
|
||||
if (priceElement) {
|
||||
// Check if this element or its parent contains multiple price elements
|
||||
const priceContainer =
|
||||
priceElement.closest('[class*="s-item__price"]') ||
|
||||
priceElement.parentElement;
|
||||
|
||||
// Filter to only elements that actually contain prices (not labels)
|
||||
const actualPrices: HTMLElement[] = [];
|
||||
for (const el of allPriceElements) {
|
||||
const text = el.textContent?.trim();
|
||||
if (text && /^\s*[$£€¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) {
|
||||
actualPrices.push(el);
|
||||
}
|
||||
}
|
||||
if (priceContainer) {
|
||||
// Look for all price elements within this container, including strikethrough prices
|
||||
const allPriceElements = priceContainer.querySelectorAll(
|
||||
'[class*="s-item__price"], span, b, em, strong, s, del, strike',
|
||||
);
|
||||
|
||||
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
|
||||
if (actualPrices.length > 1) {
|
||||
// First, look for prices that are NOT struck through
|
||||
const nonStrikethroughPrices = actualPrices.filter(el => {
|
||||
const tagName = el.tagName.toLowerCase();
|
||||
const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') ||
|
||||
el.closest('s, del, strike');
|
||||
return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles;
|
||||
});
|
||||
// Filter to only elements that actually contain prices (not labels)
|
||||
const actualPrices: HTMLElement[] = [];
|
||||
for (const el of allPriceElements) {
|
||||
const text = el.textContent?.trim();
|
||||
if (
|
||||
text &&
|
||||
/^\s*[$£€¥]/u.test(text) &&
|
||||
text.length < 50 &&
|
||||
!/\d{4}/.test(text)
|
||||
) {
|
||||
actualPrices.push(el);
|
||||
}
|
||||
}
|
||||
|
||||
if (nonStrikethroughPrices.length > 0) {
|
||||
// Use the first non-strikethrough price (sale price)
|
||||
priceElement = nonStrikethroughPrices[0];
|
||||
} else {
|
||||
// Fallback: use the last price (likely the most current)
|
||||
const lastPrice = actualPrices[actualPrices.length - 1];
|
||||
priceElement = lastPrice;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
|
||||
if (actualPrices.length > 1) {
|
||||
// First, look for prices that are NOT struck through
|
||||
const nonStrikethroughPrices = actualPrices.filter((el) => {
|
||||
const tagName = el.tagName.toLowerCase();
|
||||
const styles =
|
||||
el.classList.contains("s-strikethrough") ||
|
||||
el.classList.contains("u-flStrike") ||
|
||||
el.closest("s, del, strike");
|
||||
return (
|
||||
tagName !== "s" &&
|
||||
tagName !== "del" &&
|
||||
tagName !== "strike" &&
|
||||
!styles
|
||||
);
|
||||
});
|
||||
|
||||
const priceText = priceElement?.textContent?.trim();
|
||||
if (nonStrikethroughPrices.length > 0) {
|
||||
// Use the first non-strikethrough price (sale price)
|
||||
priceElement = nonStrikethroughPrices[0];
|
||||
} else {
|
||||
// Fallback: use the last price (likely the most current)
|
||||
const lastPrice = actualPrices[actualPrices.length - 1];
|
||||
priceElement = lastPrice;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!priceText) continue;
|
||||
const priceText = priceElement?.textContent?.trim();
|
||||
|
||||
// Parse price into cents and currency
|
||||
const priceInfo = parseEbayPrice(priceText);
|
||||
if (!priceInfo) continue;
|
||||
if (!priceText) continue;
|
||||
|
||||
// Apply exclusion filters
|
||||
if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) {
|
||||
continue;
|
||||
}
|
||||
// Parse price into cents and currency
|
||||
const priceInfo = parseEbayPrice(priceText);
|
||||
if (!priceInfo) continue;
|
||||
|
||||
// Apply strict mode filter (title must contain at least one keyword)
|
||||
if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) {
|
||||
continue;
|
||||
}
|
||||
// Apply exclusion filters
|
||||
if (
|
||||
exclusions.some((exclusion) =>
|
||||
title.toLowerCase().includes(exclusion.toLowerCase()),
|
||||
)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const listing: EbayListingDetails = {
|
||||
url: href,
|
||||
title,
|
||||
listingPrice: {
|
||||
amountFormatted: priceText,
|
||||
cents: priceInfo.cents,
|
||||
currency: priceInfo.currency,
|
||||
},
|
||||
listingType: "OFFER", // eBay listings are typically offers
|
||||
listingStatus: "ACTIVE",
|
||||
address: null, // eBay doesn't typically show detailed addresses in search results
|
||||
};
|
||||
// Apply strict mode filter (title must contain at least one keyword)
|
||||
if (
|
||||
strictMode &&
|
||||
title &&
|
||||
!keywords.some((keyword) =>
|
||||
title.toLowerCase().includes(keyword.toLowerCase()),
|
||||
)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push(listing);
|
||||
} catch (err) {
|
||||
console.warn(`Error parsing eBay listing: ${err}`);
|
||||
}
|
||||
}
|
||||
const listing: EbayListingDetails = {
|
||||
url: href,
|
||||
title,
|
||||
listingPrice: {
|
||||
amountFormatted: priceText,
|
||||
cents: priceInfo.cents,
|
||||
currency: priceInfo.currency,
|
||||
},
|
||||
listingType: "OFFER", // eBay listings are typically offers
|
||||
listingStatus: "ACTIVE",
|
||||
address: null, // eBay doesn't typically show detailed addresses in search results
|
||||
};
|
||||
|
||||
return results;
|
||||
results.push(listing);
|
||||
} catch (err) {
|
||||
console.warn(`Error parsing eBay listing: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ----------------------------- Cookie Loading -----------------------------
|
||||
|
||||
/**
|
||||
* Load eBay cookies with priority: URL param > ENV var > file
|
||||
* Uses shared cookie utility for consistent handling across all scrapers
|
||||
*/
|
||||
async function loadEbayCookies(
|
||||
cookiesSource?: string,
|
||||
): Promise<string | undefined> {
|
||||
const cookies = await loadCookiesOptional(EBAY_COOKIE_CONFIG, cookiesSource);
|
||||
|
||||
if (cookies.length === 0) {
|
||||
console.warn(
|
||||
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
|
||||
"Provide cookies via (in priority order):\n" +
|
||||
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
||||
" 2. EBAY_COOKIE environment variable, or\n" +
|
||||
" 3. ./cookies/ebay.json file (lowest priority)\n" +
|
||||
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||
);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return formatCookiesForHeader(cookies, "www.ebay.ca");
|
||||
}
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
opts: {
|
||||
minPrice?: number;
|
||||
maxPrice?: number;
|
||||
strictMode?: boolean;
|
||||
exclusions?: string[];
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
} = {},
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
opts: {
|
||||
minPrice?: number;
|
||||
maxPrice?: number;
|
||||
strictMode?: boolean;
|
||||
exclusions?: string[];
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||
} = {},
|
||||
) {
|
||||
const {
|
||||
minPrice = 0,
|
||||
maxPrice = Number.MAX_SAFE_INTEGER,
|
||||
strictMode = false,
|
||||
exclusions = [],
|
||||
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
||||
buyItNowOnly = true,
|
||||
canadaOnly = true,
|
||||
} = opts;
|
||||
const {
|
||||
minPrice = 0,
|
||||
maxPrice = Number.MAX_SAFE_INTEGER,
|
||||
strictMode = false,
|
||||
exclusions = [],
|
||||
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
||||
buyItNowOnly = true,
|
||||
canadaOnly = true,
|
||||
cookies: cookiesSource,
|
||||
} = opts;
|
||||
|
||||
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
||||
const urlParams = new URLSearchParams({
|
||||
_nkw: SEARCH_QUERY,
|
||||
_sacat: "0",
|
||||
_from: "R40",
|
||||
});
|
||||
// Load eBay cookies with priority: URL param > ENV var > file
|
||||
const cookies = await loadEbayCookies(cookiesSource);
|
||||
|
||||
if (buyItNowOnly) {
|
||||
urlParams.set("LH_BIN", "1");
|
||||
}
|
||||
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
||||
const urlParams = new URLSearchParams({
|
||||
_nkw: SEARCH_QUERY,
|
||||
_sacat: "0",
|
||||
_from: "R40",
|
||||
});
|
||||
|
||||
if (canadaOnly) {
|
||||
urlParams.set("LH_PrefLoc", "1");
|
||||
}
|
||||
if (buyItNowOnly) {
|
||||
urlParams.set("LH_BIN", "1");
|
||||
}
|
||||
|
||||
const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`;
|
||||
if (canadaOnly) {
|
||||
urlParams.set("LH_PrefLoc", "1");
|
||||
}
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`;
|
||||
|
||||
console.log(`Fetching eBay search: ${searchUrl}`);
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
|
||||
try {
|
||||
// Use custom headers modeled after real browser requests to bypass bot detection
|
||||
const headers: Record<string, string> = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Referer': 'https://www.ebay.ca/',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Priority': 'u=0, i'
|
||||
};
|
||||
console.log(`Fetching eBay search: ${searchUrl}`);
|
||||
|
||||
const res = await fetch(searchUrl, {
|
||||
method: "GET",
|
||||
headers,
|
||||
});
|
||||
try {
|
||||
// Use custom headers modeled after real browser requests to bypass bot detection
|
||||
const headers: Record<string, string> = {
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
|
||||
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||
Referer: "https://www.ebay.ca/",
|
||||
Connection: "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"Sec-Fetch-User": "?1",
|
||||
Priority: "u=0, i",
|
||||
};
|
||||
|
||||
if (!res.ok) {
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
searchUrl,
|
||||
);
|
||||
}
|
||||
// Add cookies if available (helps bypass bot detection)
|
||||
if (cookies) {
|
||||
headers.Cookie = cookies;
|
||||
}
|
||||
|
||||
const searchHtml = await res.text();
|
||||
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
||||
await delay(DELAY_MS);
|
||||
const res = await fetch(searchUrl, {
|
||||
method: "GET",
|
||||
headers,
|
||||
});
|
||||
|
||||
console.log(`\nParsing eBay listings...`);
|
||||
if (!res.ok) {
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
searchUrl,
|
||||
);
|
||||
}
|
||||
|
||||
const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode);
|
||||
const searchHtml = await res.text();
|
||||
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
||||
await delay(DELAY_MS);
|
||||
|
||||
// Filter by price range (additional safety check)
|
||||
const filteredListings = listings.filter(listing => {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
return cents && cents >= minPrice && cents <= maxPrice;
|
||||
});
|
||||
console.log(`\nParsing eBay listings...`);
|
||||
|
||||
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
||||
return filteredListings;
|
||||
const listings = parseEbayListings(
|
||||
searchHtml,
|
||||
keywords,
|
||||
exclusions,
|
||||
strictMode,
|
||||
);
|
||||
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
`Failed to fetch eBay search (${err.status}): ${err.message}`,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
// Filter by price range (additional safety check)
|
||||
const filteredListings = listings.filter((listing) => {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
return cents && cents >= minPrice && cents <= maxPrice;
|
||||
});
|
||||
|
||||
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
||||
return filteredListings;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
`Failed to fetch eBay search (${err.status}): ${err.message}`,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -3,18 +3,18 @@ export type HTMLString = string;
|
||||
|
||||
/** Currency price object with formatting options */
|
||||
export interface Price {
|
||||
amountFormatted: string;
|
||||
cents: number;
|
||||
currency: string;
|
||||
amountFormatted: string;
|
||||
cents: number;
|
||||
currency: string;
|
||||
}
|
||||
|
||||
/** Base listing details common across all marketplaces */
|
||||
export interface ListingDetails {
|
||||
url: string;
|
||||
title: string;
|
||||
listingPrice: Price;
|
||||
listingType: string;
|
||||
listingStatus: string;
|
||||
address?: string | null;
|
||||
creationDate?: string;
|
||||
url: string;
|
||||
title: string;
|
||||
listingPrice: Price;
|
||||
listingType: string;
|
||||
listingStatus: string;
|
||||
address?: string | null;
|
||||
creationDate?: string;
|
||||
}
|
||||
|
||||
227
packages/core/src/utils/cookies.ts
Normal file
227
packages/core/src/utils/cookies.ts
Normal file
@@ -0,0 +1,227 @@
|
||||
/**
|
||||
* Shared cookie handling utilities for marketplace scrapers
|
||||
*/
|
||||
|
||||
export interface Cookie {
|
||||
name: string;
|
||||
value: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
secure?: boolean;
|
||||
httpOnly?: boolean;
|
||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||
session?: boolean;
|
||||
expirationDate?: number;
|
||||
partitionKey?: Record<string, unknown>;
|
||||
storeId?: string;
|
||||
}
|
||||
|
||||
export interface CookieConfig {
|
||||
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
|
||||
name: string;
|
||||
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
|
||||
domain: string;
|
||||
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
|
||||
envVar: string;
|
||||
/** Path to cookie file (e.g., "./cookies/facebook.json") */
|
||||
filePath: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse cookie string format into Cookie array
|
||||
* Supports format: "name1=value1; name2=value2"
|
||||
*/
|
||||
export function parseCookieString(
|
||||
cookieString: string,
|
||||
domain: string,
|
||||
): Cookie[] {
|
||||
if (!cookieString?.trim()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return cookieString
|
||||
.split(";")
|
||||
.map((pair) => pair.trim())
|
||||
.filter((pair) => pair.includes("="))
|
||||
.map((pair) => {
|
||||
const [name, ...valueParts] = pair.split("=");
|
||||
const trimmedName = name.trim();
|
||||
const trimmedValue = valueParts.join("=").trim();
|
||||
|
||||
if (!trimmedName || !trimmedValue) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
name: trimmedName,
|
||||
value: decodeURIComponent(trimmedValue),
|
||||
domain,
|
||||
path: "/",
|
||||
secure: true,
|
||||
httpOnly: false,
|
||||
sameSite: "lax" as const,
|
||||
expirationDate: undefined,
|
||||
};
|
||||
})
|
||||
.filter((cookie): cookie is Cookie => cookie !== null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse JSON array format into Cookie array
|
||||
* Supports format: [{"name": "foo", "value": "bar", ...}]
|
||||
*/
|
||||
export function parseJsonCookies(jsonString: string): Cookie[] {
|
||||
const parsed = JSON.parse(jsonString);
|
||||
if (!Array.isArray(parsed)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return parsed.filter(
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string",
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to parse cookies from a string (tries JSON first, then cookie string format)
|
||||
*/
|
||||
export function parseCookiesAuto(
|
||||
input: string,
|
||||
defaultDomain: string,
|
||||
): Cookie[] {
|
||||
// Try JSON array format first
|
||||
try {
|
||||
const cookies = parseJsonCookies(input);
|
||||
if (cookies.length > 0) {
|
||||
return cookies;
|
||||
}
|
||||
} catch {
|
||||
// JSON parse failed, try cookie string format
|
||||
}
|
||||
|
||||
// Try cookie string format
|
||||
return parseCookieString(input, defaultDomain);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load cookies from file (supports both JSON array and cookie string formats)
|
||||
*/
|
||||
export async function loadCookiesFromFile(
|
||||
filePath: string,
|
||||
defaultDomain: string,
|
||||
): Promise<Cookie[]> {
|
||||
const file = Bun.file(filePath);
|
||||
if (!(await file.exists())) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const content = await file.text();
|
||||
return parseCookiesAuto(content.trim(), defaultDomain);
|
||||
}
|
||||
|
||||
/**
|
||||
* Format cookies array into Cookie header string for HTTP requests
|
||||
*/
|
||||
export function formatCookiesForHeader(
|
||||
cookies: Cookie[],
|
||||
targetDomain: string,
|
||||
): string {
|
||||
const validCookies = cookies
|
||||
.filter((cookie) => {
|
||||
// Check if cookie applies to this domain
|
||||
if (cookie.domain.startsWith(".")) {
|
||||
// Domain cookie (applies to subdomains)
|
||||
return (
|
||||
targetDomain.endsWith(cookie.domain.slice(1)) ||
|
||||
targetDomain === cookie.domain.slice(1)
|
||||
);
|
||||
}
|
||||
// Host-only cookie
|
||||
return cookie.domain === targetDomain;
|
||||
})
|
||||
.filter((cookie) => {
|
||||
// Check expiration
|
||||
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
return validCookies
|
||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||
.join("; ");
|
||||
}
|
||||
|
||||
/**
|
||||
* Load cookies with priority: URL param > ENV var > file
|
||||
* Supports both JSON array and cookie string formats for all sources
|
||||
*/
|
||||
export async function ensureCookies(
|
||||
config: CookieConfig,
|
||||
cookiesSource?: string,
|
||||
): Promise<Cookie[]> {
|
||||
// Priority 1: URL/API parameter (if provided)
|
||||
if (cookiesSource) {
|
||||
const cookies = parseCookiesAuto(cookiesSource, config.domain);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
console.warn(
|
||||
`${config.name} cookies parameter provided but no valid cookies extracted`,
|
||||
);
|
||||
}
|
||||
|
||||
// Priority 2: Environment variable
|
||||
const envValue = process.env[config.envVar];
|
||||
if (envValue?.trim()) {
|
||||
const cookies = parseCookiesAuto(envValue, config.domain);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
console.warn(`${config.envVar} env var contains no valid cookies`);
|
||||
}
|
||||
|
||||
// Priority 3: Cookie file (fallback)
|
||||
try {
|
||||
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
|
||||
}
|
||||
|
||||
// No cookies found from any source
|
||||
throw new Error(
|
||||
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
|
||||
` 1. 'cookies' parameter (highest priority), or\n` +
|
||||
` 2. ${config.envVar} environment variable, or\n` +
|
||||
` 3. ${config.filePath} file (lowest priority)\n` +
|
||||
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to load cookies, return empty array if none found (non-throwing version)
|
||||
*/
|
||||
export async function loadCookiesOptional(
|
||||
config: CookieConfig,
|
||||
cookiesSource?: string,
|
||||
): Promise<Cookie[]> {
|
||||
try {
|
||||
return await ensureCookies(config, cookiesSource);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
@@ -4,5 +4,5 @@
|
||||
* @returns A promise that resolves after the specified delay
|
||||
*/
|
||||
export function delay(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
@@ -4,18 +4,21 @@
|
||||
* @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US')
|
||||
* @returns Formatted currency string
|
||||
*/
|
||||
export function formatCentsToCurrency(cents: number, locale: string = "en-CA"): string {
|
||||
try {
|
||||
const formatter = new Intl.NumberFormat(locale, {
|
||||
style: "currency",
|
||||
currency: "CAD",
|
||||
minimumFractionDigits: 2,
|
||||
maximumFractionDigits: 2,
|
||||
});
|
||||
return formatter.format(cents / 100);
|
||||
} catch (error) {
|
||||
// Fallback if locale is not supported
|
||||
const dollars = (cents / 100).toFixed(2);
|
||||
return `$${dollars}`;
|
||||
}
|
||||
export function formatCentsToCurrency(
|
||||
cents: number,
|
||||
locale: string = "en-CA",
|
||||
): string {
|
||||
try {
|
||||
const formatter = new Intl.NumberFormat(locale, {
|
||||
style: "currency",
|
||||
currency: "CAD",
|
||||
minimumFractionDigits: 2,
|
||||
maximumFractionDigits: 2,
|
||||
});
|
||||
return formatter.format(cents / 100);
|
||||
} catch {
|
||||
// Fallback if locale is not supported
|
||||
const dollars = (cents / 100).toFixed(2);
|
||||
return `$${dollars}`;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,79 +1,79 @@
|
||||
/** Custom error class for HTTP-related failures */
|
||||
export class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly statusCode: number,
|
||||
public readonly url?: string
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly statusCode: number,
|
||||
public readonly url?: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for network failures (timeouts, connection issues) */
|
||||
export class NetworkError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly url: string,
|
||||
public readonly cause?: Error
|
||||
) {
|
||||
super(message);
|
||||
this.name = "NetworkError";
|
||||
}
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly url: string,
|
||||
public readonly cause?: Error,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "NetworkError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for parsing failures */
|
||||
export class ParseError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly data?: unknown
|
||||
) {
|
||||
super(message);
|
||||
this.name = "ParseError";
|
||||
}
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly data?: unknown,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "ParseError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for rate limiting */
|
||||
export class RateLimitError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly url: string,
|
||||
public readonly resetTime?: number
|
||||
) {
|
||||
super(message);
|
||||
this.name = "RateLimitError";
|
||||
}
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly url: string,
|
||||
public readonly resetTime?: number,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "RateLimitError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for validation failures */
|
||||
export class ValidationError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = "ValidationError";
|
||||
}
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = "ValidationError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Type guard to check if a value is a record (object) */
|
||||
export function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate exponential backoff delay with jitter
|
||||
*/
|
||||
function calculateBackoffDelay(attempt: number, baseMs: number): number {
|
||||
const exponentialDelay = baseMs * 2 ** attempt;
|
||||
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
|
||||
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
|
||||
const exponentialDelay = baseMs * 2 ** attempt;
|
||||
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
|
||||
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
|
||||
}
|
||||
|
||||
/** Options for fetchHtml */
|
||||
export interface FetchHtmlOptions {
|
||||
maxRetries?: number;
|
||||
retryBaseMs?: number;
|
||||
timeoutMs?: number;
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
headers?: Record<string, string>;
|
||||
maxRetries?: number;
|
||||
retryBaseMs?: number;
|
||||
timeoutMs?: number;
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
headers?: Record<string, string>;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -85,116 +85,116 @@ export interface FetchHtmlOptions {
|
||||
* @throws HttpError, NetworkError, or RateLimitError on failure
|
||||
*/
|
||||
export async function fetchHtml(
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts?: FetchHtmlOptions
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts?: FetchHtmlOptions,
|
||||
): Promise<string> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 1000;
|
||||
const timeoutMs = opts?.timeoutMs ?? 30000;
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 1000;
|
||||
const timeoutMs = opts?.timeoutMs ?? 30000;
|
||||
|
||||
const defaultHeaders: Record<string, string> = {
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||
"cache-control": "no-cache",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
|
||||
};
|
||||
const defaultHeaders: Record<string, string> = {
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||
"cache-control": "no-cache",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
|
||||
};
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: { ...defaultHeaders, ...opts?.headers },
|
||||
signal: controller.signal,
|
||||
});
|
||||
const res = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: { ...defaultHeaders, ...opts?.headers },
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
clearTimeout(timeoutId);
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
||||
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
||||
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
||||
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
||||
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
||||
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
||||
|
||||
if (!res.ok) {
|
||||
// Handle rate limiting
|
||||
if (res.status === 429) {
|
||||
const resetSeconds = rateLimitReset
|
||||
? Number(rateLimitReset)
|
||||
: Number.NaN;
|
||||
const waitMs = Number.isFinite(resetSeconds)
|
||||
? Math.max(0, resetSeconds * 1000)
|
||||
: calculateBackoffDelay(attempt, retryBaseMs);
|
||||
if (!res.ok) {
|
||||
// Handle rate limiting
|
||||
if (res.status === 429) {
|
||||
const resetSeconds = rateLimitReset
|
||||
? Number(rateLimitReset)
|
||||
: Number.NaN;
|
||||
const waitMs = Number.isFinite(resetSeconds)
|
||||
? Math.max(0, resetSeconds * 1000)
|
||||
: calculateBackoffDelay(attempt, retryBaseMs);
|
||||
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||
continue;
|
||||
}
|
||||
throw new RateLimitError(
|
||||
`Rate limit exceeded for ${url}`,
|
||||
url,
|
||||
resetSeconds
|
||||
);
|
||||
}
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||
continue;
|
||||
}
|
||||
throw new RateLimitError(
|
||||
`Rate limit exceeded for ${url}`,
|
||||
url,
|
||||
resetSeconds,
|
||||
);
|
||||
}
|
||||
|
||||
// Retry on server errors
|
||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
||||
);
|
||||
continue;
|
||||
}
|
||||
// Retry on server errors
|
||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url
|
||||
);
|
||||
}
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url,
|
||||
);
|
||||
}
|
||||
|
||||
const html = await res.text();
|
||||
const html = await res.text();
|
||||
|
||||
// Respect per-request delay to maintain rate limiting
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
return html;
|
||||
} catch (err) {
|
||||
// Re-throw known errors
|
||||
if (
|
||||
err instanceof RateLimitError ||
|
||||
err instanceof HttpError ||
|
||||
err instanceof NetworkError
|
||||
) {
|
||||
throw err;
|
||||
}
|
||||
// Respect per-request delay to maintain rate limiting
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
return html;
|
||||
} catch (err) {
|
||||
// Re-throw known errors
|
||||
if (
|
||||
err instanceof RateLimitError ||
|
||||
err instanceof HttpError ||
|
||||
err instanceof NetworkError
|
||||
) {
|
||||
throw err;
|
||||
}
|
||||
|
||||
if (err instanceof Error && err.name === "AbortError") {
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new NetworkError(`Request timeout for ${url}`, url, err);
|
||||
}
|
||||
if (err instanceof Error && err.name === "AbortError") {
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new NetworkError(`Request timeout for ${url}`, url, err);
|
||||
}
|
||||
|
||||
// Network or other errors
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new NetworkError(
|
||||
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
||||
url,
|
||||
err instanceof Error ? err : undefined
|
||||
);
|
||||
}
|
||||
}
|
||||
// Network or other errors
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new NetworkError(
|
||||
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
||||
url,
|
||||
err instanceof Error ? err : undefined,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
throw new NetworkError(`Exhausted retries without response for ${url}`, url);
|
||||
throw new NetworkError(`Exhausted retries without response for ${url}`, url);
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ import {
|
||||
fetchFacebookItem,
|
||||
formatCentsToCurrency,
|
||||
formatCookiesForHeader,
|
||||
loadFacebookCookies,
|
||||
parseFacebookAds,
|
||||
parseFacebookCookieString,
|
||||
parseFacebookItem,
|
||||
@@ -183,7 +182,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
});
|
||||
});
|
||||
|
||||
const result = await fetchFacebookItem("123", mockCookies);
|
||||
const _result = await fetchFacebookItem("123", mockCookies);
|
||||
expect(attempts).toBe(2);
|
||||
// Should eventually succeed after retry
|
||||
});
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook";
|
||||
import { fetchFacebookItems } from "../src/scrapers/facebook";
|
||||
|
||||
// Mock fetch globally
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
@@ -1,166 +1,157 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import {
|
||||
HttpError,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
ValidationError,
|
||||
buildSearchUrl,
|
||||
resolveCategoryId,
|
||||
resolveLocationId,
|
||||
buildSearchUrl,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
resolveCategoryId,
|
||||
resolveLocationId,
|
||||
ValidationError,
|
||||
} from "../src/scrapers/kijiji";
|
||||
|
||||
describe("Location and Category Resolution", () => {
|
||||
describe("resolveLocationId", () => {
|
||||
test("should return numeric IDs as-is", () => {
|
||||
expect(resolveLocationId(1700272)).toBe(1700272);
|
||||
expect(resolveLocationId(0)).toBe(0);
|
||||
});
|
||||
describe("resolveLocationId", () => {
|
||||
test("should return numeric IDs as-is", () => {
|
||||
expect(resolveLocationId(1700272)).toBe(1700272);
|
||||
expect(resolveLocationId(0)).toBe(0);
|
||||
});
|
||||
|
||||
test("should resolve string location names", () => {
|
||||
expect(resolveLocationId("canada")).toBe(0);
|
||||
expect(resolveLocationId("ontario")).toBe(9004);
|
||||
expect(resolveLocationId("toronto")).toBe(1700273);
|
||||
expect(resolveLocationId("gta")).toBe(1700272);
|
||||
});
|
||||
test("should resolve string location names", () => {
|
||||
expect(resolveLocationId("canada")).toBe(0);
|
||||
expect(resolveLocationId("ontario")).toBe(9004);
|
||||
expect(resolveLocationId("toronto")).toBe(1700273);
|
||||
expect(resolveLocationId("gta")).toBe(1700272);
|
||||
});
|
||||
|
||||
test("should handle case insensitive matching", () => {
|
||||
expect(resolveLocationId("Canada")).toBe(0);
|
||||
expect(resolveLocationId("ONTARIO")).toBe(9004);
|
||||
});
|
||||
test("should handle case insensitive matching", () => {
|
||||
expect(resolveLocationId("Canada")).toBe(0);
|
||||
expect(resolveLocationId("ONTARIO")).toBe(9004);
|
||||
});
|
||||
|
||||
test("should default to Canada for unknown locations", () => {
|
||||
expect(resolveLocationId("unknown")).toBe(0);
|
||||
expect(resolveLocationId("")).toBe(0);
|
||||
});
|
||||
test("should default to Canada for unknown locations", () => {
|
||||
expect(resolveLocationId("unknown")).toBe(0);
|
||||
expect(resolveLocationId("")).toBe(0);
|
||||
});
|
||||
|
||||
test("should handle undefined input", () => {
|
||||
expect(resolveLocationId(undefined)).toBe(0);
|
||||
});
|
||||
});
|
||||
test("should handle undefined input", () => {
|
||||
expect(resolveLocationId(undefined)).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveCategoryId", () => {
|
||||
test("should return numeric IDs as-is", () => {
|
||||
expect(resolveCategoryId(132)).toBe(132);
|
||||
expect(resolveCategoryId(0)).toBe(0);
|
||||
});
|
||||
describe("resolveCategoryId", () => {
|
||||
test("should return numeric IDs as-is", () => {
|
||||
expect(resolveCategoryId(132)).toBe(132);
|
||||
expect(resolveCategoryId(0)).toBe(0);
|
||||
});
|
||||
|
||||
test("should resolve string category names", () => {
|
||||
expect(resolveCategoryId("all")).toBe(0);
|
||||
expect(resolveCategoryId("phones")).toBe(132);
|
||||
expect(resolveCategoryId("electronics")).toBe(29659001);
|
||||
expect(resolveCategoryId("buy-sell")).toBe(10);
|
||||
});
|
||||
test("should resolve string category names", () => {
|
||||
expect(resolveCategoryId("all")).toBe(0);
|
||||
expect(resolveCategoryId("phones")).toBe(132);
|
||||
expect(resolveCategoryId("electronics")).toBe(29659001);
|
||||
expect(resolveCategoryId("buy-sell")).toBe(10);
|
||||
});
|
||||
|
||||
test("should handle case insensitive matching", () => {
|
||||
expect(resolveCategoryId("All")).toBe(0);
|
||||
expect(resolveCategoryId("PHONES")).toBe(132);
|
||||
});
|
||||
test("should handle case insensitive matching", () => {
|
||||
expect(resolveCategoryId("All")).toBe(0);
|
||||
expect(resolveCategoryId("PHONES")).toBe(132);
|
||||
});
|
||||
|
||||
test("should default to all categories for unknown categories", () => {
|
||||
expect(resolveCategoryId("unknown")).toBe(0);
|
||||
expect(resolveCategoryId("")).toBe(0);
|
||||
});
|
||||
test("should default to all categories for unknown categories", () => {
|
||||
expect(resolveCategoryId("unknown")).toBe(0);
|
||||
expect(resolveCategoryId("")).toBe(0);
|
||||
});
|
||||
|
||||
test("should handle undefined input", () => {
|
||||
expect(resolveCategoryId(undefined)).toBe(0);
|
||||
});
|
||||
});
|
||||
test("should handle undefined input", () => {
|
||||
expect(resolveCategoryId(undefined)).toBe(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("URL Construction", () => {
|
||||
describe("buildSearchUrl", () => {
|
||||
test("should build basic search URL", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
location: 1700272,
|
||||
category: 132,
|
||||
sortBy: "relevancy",
|
||||
sortOrder: "desc",
|
||||
});
|
||||
describe("buildSearchUrl", () => {
|
||||
test("should build basic search URL", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
location: 1700272,
|
||||
category: 132,
|
||||
sortBy: "relevancy",
|
||||
sortOrder: "desc",
|
||||
});
|
||||
|
||||
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
|
||||
expect(url).toContain("sort=relevancyDesc");
|
||||
expect(url).toContain("order=DESC");
|
||||
});
|
||||
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
|
||||
expect(url).toContain("sort=relevancyDesc");
|
||||
expect(url).toContain("order=DESC");
|
||||
});
|
||||
|
||||
test("should handle pagination", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
location: 1700272,
|
||||
category: 132,
|
||||
page: 2,
|
||||
});
|
||||
test("should handle pagination", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
location: 1700272,
|
||||
category: 132,
|
||||
page: 2,
|
||||
});
|
||||
|
||||
expect(url).toContain("&page=2");
|
||||
});
|
||||
expect(url).toContain("&page=2");
|
||||
});
|
||||
|
||||
test("should handle different sort options", () => {
|
||||
const dateUrl = buildSearchUrl("iphone", {
|
||||
sortBy: "date",
|
||||
sortOrder: "asc",
|
||||
});
|
||||
expect(dateUrl).toContain("sort=DATE");
|
||||
expect(dateUrl).toContain("order=ASC");
|
||||
test("should handle different sort options", () => {
|
||||
const dateUrl = buildSearchUrl("iphone", {
|
||||
sortBy: "date",
|
||||
sortOrder: "asc",
|
||||
});
|
||||
expect(dateUrl).toContain("sort=DATE");
|
||||
expect(dateUrl).toContain("order=ASC");
|
||||
|
||||
const priceUrl = buildSearchUrl("iphone", {
|
||||
sortBy: "price",
|
||||
sortOrder: "desc",
|
||||
});
|
||||
expect(priceUrl).toContain("sort=PRICE");
|
||||
expect(priceUrl).toContain("order=DESC");
|
||||
});
|
||||
const priceUrl = buildSearchUrl("iphone", {
|
||||
sortBy: "price",
|
||||
sortOrder: "desc",
|
||||
});
|
||||
expect(priceUrl).toContain("sort=PRICE");
|
||||
expect(priceUrl).toContain("order=DESC");
|
||||
});
|
||||
|
||||
test("should handle string location/category inputs", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
location: "toronto",
|
||||
category: "phones",
|
||||
});
|
||||
test("should handle string location/category inputs", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
location: "toronto",
|
||||
category: "phones",
|
||||
});
|
||||
|
||||
expect(url).toContain("k0c132l1700273"); // phones + toronto
|
||||
});
|
||||
});
|
||||
expect(url).toContain("k0c132l1700273"); // phones + toronto
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Error Classes", () => {
|
||||
test("HttpError should store status and URL", () => {
|
||||
const error = new HttpError("Not found", 404, "https://example.com");
|
||||
expect(error.message).toBe("Not found");
|
||||
expect(error.statusCode).toBe(404);
|
||||
expect(error.url).toBe("https://example.com");
|
||||
expect(error.name).toBe("HttpError");
|
||||
});
|
||||
test("NetworkError should store URL and cause", () => {
|
||||
const cause = new Error("Connection failed");
|
||||
const error = new NetworkError(
|
||||
"Network error",
|
||||
"https://example.com",
|
||||
cause,
|
||||
);
|
||||
expect(error.message).toBe("Network error");
|
||||
expect(error.url).toBe("https://example.com");
|
||||
expect(error.cause).toBe(cause);
|
||||
expect(error.name).toBe("NetworkError");
|
||||
});
|
||||
|
||||
test("NetworkError should store URL and cause", () => {
|
||||
const cause = new Error("Connection failed");
|
||||
const error = new NetworkError(
|
||||
"Network error",
|
||||
"https://example.com",
|
||||
cause
|
||||
);
|
||||
expect(error.message).toBe("Network error");
|
||||
expect(error.url).toBe("https://example.com");
|
||||
expect(error.cause).toBe(cause);
|
||||
expect(error.name).toBe("NetworkError");
|
||||
});
|
||||
test("ParseError should store data", () => {
|
||||
const data = { invalid: "json" };
|
||||
const error = new ParseError("Invalid JSON", data);
|
||||
expect(error.message).toBe("Invalid JSON");
|
||||
expect(error.data).toBe(data);
|
||||
expect(error.name).toBe("ParseError");
|
||||
});
|
||||
|
||||
test("ParseError should store data", () => {
|
||||
const data = { invalid: "json" };
|
||||
const error = new ParseError("Invalid JSON", data);
|
||||
expect(error.message).toBe("Invalid JSON");
|
||||
expect(error.data).toBe(data);
|
||||
expect(error.name).toBe("ParseError");
|
||||
});
|
||||
test("RateLimitError should store URL and reset time", () => {
|
||||
const error = new RateLimitError("Rate limited", "https://example.com", 60);
|
||||
expect(error.message).toBe("Rate limited");
|
||||
expect(error.url).toBe("https://example.com");
|
||||
expect(error.resetTime).toBe(60);
|
||||
expect(error.name).toBe("RateLimitError");
|
||||
});
|
||||
|
||||
test("RateLimitError should store URL and reset time", () => {
|
||||
const error = new RateLimitError("Rate limited", "https://example.com", 60);
|
||||
expect(error.message).toBe("Rate limited");
|
||||
expect(error.url).toBe("https://example.com");
|
||||
expect(error.resetTime).toBe(60);
|
||||
expect(error.name).toBe("RateLimitError");
|
||||
});
|
||||
|
||||
test("ValidationError should work without field", () => {
|
||||
const error = new ValidationError("Invalid value");
|
||||
expect(error.message).toBe("Invalid value");
|
||||
expect(error.name).toBe("ValidationError");
|
||||
});
|
||||
test("ValidationError should work without field", () => {
|
||||
const error = new ValidationError("Invalid value");
|
||||
expect(error.message).toBe("Invalid value");
|
||||
expect(error.name).toBe("ValidationError");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
|
||||
|
||||
describe("Utility Functions", () => {
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
|
||||
// Mock fetch globally for tests
|
||||
global.fetch =
|
||||
global.fetch ||
|
||||
(() => {
|
||||
throw new Error("fetch is not available in test environment");
|
||||
});
|
||||
global.fetch ||
|
||||
(() => {
|
||||
throw new Error("fetch is not available in test environment");
|
||||
});
|
||||
|
||||
// Add any global test utilities here
|
||||
|
||||
@@ -4,30 +4,33 @@ import { serverCard } from "./protocol/metadata";
|
||||
const PORT = process.env.MCP_PORT || 4006;
|
||||
|
||||
const server = Bun.serve({
|
||||
port: PORT as number | string,
|
||||
idleTimeout: 0,
|
||||
routes: {
|
||||
// MCP metadata discovery endpoint
|
||||
"/.well-known/mcp/server-card.json": new Response(JSON.stringify(serverCard), {
|
||||
headers: { "Content-Type": "application/json" },
|
||||
}),
|
||||
port: PORT as number | string,
|
||||
idleTimeout: 255, // 255 seconds (max allowed)
|
||||
routes: {
|
||||
// MCP metadata discovery endpoint
|
||||
"/.well-known/mcp/server-card.json": new Response(
|
||||
JSON.stringify(serverCard),
|
||||
{
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
),
|
||||
|
||||
// MCP JSON-RPC 2.0 protocol endpoint
|
||||
"/mcp": async (req: Request) => {
|
||||
if (req.method === "POST") {
|
||||
return await handleMcpRequest(req);
|
||||
}
|
||||
return Response.json(
|
||||
{ message: "MCP endpoint requires POST request" },
|
||||
{ status: 405 }
|
||||
);
|
||||
},
|
||||
},
|
||||
// MCP JSON-RPC 2.0 protocol endpoint
|
||||
"/mcp": async (req: Request) => {
|
||||
if (req.method === "POST") {
|
||||
return await handleMcpRequest(req);
|
||||
}
|
||||
return Response.json(
|
||||
{ message: "MCP endpoint requires POST request" },
|
||||
{ status: 405 },
|
||||
);
|
||||
},
|
||||
},
|
||||
|
||||
// Fallback for all other routes
|
||||
fetch(req: Request) {
|
||||
return new Response("Not Found", { status: 404 });
|
||||
},
|
||||
// Fallback for all other routes
|
||||
fetch(_req: Request) {
|
||||
return new Response("Not Found", { status: 404 });
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`MCP Server running on ${server.hostname}:${server.port}`);
|
||||
|
||||
@@ -1,187 +1,292 @@
|
||||
import { fetchKijijiItems, fetchFacebookItems, fetchEbayItems } from "@marketplace-scrapers/core";
|
||||
import { tools } from "./tools";
|
||||
|
||||
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
|
||||
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default
|
||||
|
||||
/**
|
||||
* Handle MCP JSON-RPC 2.0 protocol requests
|
||||
*/
|
||||
export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
try {
|
||||
const body = await req.json();
|
||||
try {
|
||||
const body = await req.json();
|
||||
|
||||
// Validate JSON-RPC 2.0 format
|
||||
if (!body.jsonrpc || body.jsonrpc !== "2.0" || !body.method) {
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
error: { code: -32600, message: "Invalid Request" },
|
||||
id: body.id,
|
||||
},
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
// Validate JSON-RPC 2.0 format
|
||||
if (!body.jsonrpc || body.jsonrpc !== "2.0" || !body.method) {
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
error: { code: -32600, message: "Invalid Request" },
|
||||
id: body.id,
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
const { method, params, id } = body;
|
||||
const { method, params, id } = body;
|
||||
|
||||
// Handle initialize method
|
||||
if (method === "initialize") {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
result: {
|
||||
protocolVersion: "2025-06-18",
|
||||
capabilities: {
|
||||
tools: {
|
||||
listChanged: true,
|
||||
},
|
||||
},
|
||||
serverInfo: {
|
||||
name: "marketplace-scrapers",
|
||||
version: "1.0.0",
|
||||
},
|
||||
instructions: "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
|
||||
},
|
||||
});
|
||||
}
|
||||
// Handle initialize method
|
||||
if (method === "initialize") {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
result: {
|
||||
protocolVersion: "2025-06-18",
|
||||
capabilities: {
|
||||
tools: {
|
||||
listChanged: true,
|
||||
},
|
||||
},
|
||||
serverInfo: {
|
||||
name: "marketplace-scrapers",
|
||||
version: "1.0.0",
|
||||
},
|
||||
instructions:
|
||||
"Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Handle tools/list method
|
||||
if (method === "tools/list") {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
result: {
|
||||
tools,
|
||||
},
|
||||
});
|
||||
}
|
||||
// Handle tools/list method
|
||||
if (method === "tools/list") {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
result: {
|
||||
tools,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Handle notifications (messages without id field should not get a response)
|
||||
if (!id) {
|
||||
// Notifications don't require a response
|
||||
if (method === "notifications/initialized") {
|
||||
// Client initialized successfully, no response needed
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
if (method === "notifications/progress") {
|
||||
// Progress notifications, no response needed
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
// Unknown notification - still no response for notifications
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
// Handle notifications (messages without id field should not get a response)
|
||||
if (!id) {
|
||||
// Notifications don't require a response
|
||||
if (method === "notifications/initialized") {
|
||||
// Client initialized successfully, no response needed
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
if (method === "notifications/progress") {
|
||||
// Progress notifications, no response needed
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
// Unknown notification - still no response for notifications
|
||||
return new Response(null, { status: 204 });
|
||||
}
|
||||
|
||||
// Handle tools/call method
|
||||
if (method === "tools/call") {
|
||||
const { name, arguments: args } = params || {};
|
||||
// Handle tools/call method
|
||||
if (method === "tools/call") {
|
||||
const { name, arguments: args } = params || {};
|
||||
|
||||
if (!name || !args) {
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "Invalid params: name and arguments required" },
|
||||
},
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
if (!name || !args) {
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: {
|
||||
code: -32602,
|
||||
message: "Invalid params: name and arguments required",
|
||||
},
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
// Route tool calls to appropriate handlers
|
||||
try {
|
||||
let result;
|
||||
// Route tool calls to appropriate handlers
|
||||
try {
|
||||
let result: unknown;
|
||||
|
||||
if (name === "search_kijiji") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchKijijiItems(query, args.maxItems || 5);
|
||||
result = items || [];
|
||||
} else if (name === "search_facebook") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchFacebookItems(
|
||||
query,
|
||||
args.maxItems || 5,
|
||||
args.location || "toronto",
|
||||
25,
|
||||
args.cookiesSource
|
||||
);
|
||||
result = items || [];
|
||||
} else if (name === "search_ebay") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchEbayItems(query, args.maxItems || 5, {
|
||||
minPrice: args.minPrice,
|
||||
maxPrice: args.maxPrice,
|
||||
strictMode: args.strictMode || false,
|
||||
exclusions: args.exclusions || [],
|
||||
keywords: args.keywords || [query],
|
||||
buyItNowOnly: args.buyItNowOnly !== false,
|
||||
canadaOnly: args.canadaOnly !== false,
|
||||
});
|
||||
result = items || [];
|
||||
} else {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32601, message: `Unknown tool: ${name}` },
|
||||
});
|
||||
}
|
||||
if (name === "search_kijiji") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const params = new URLSearchParams({ q: query });
|
||||
if (args.location) params.append("location", args.location);
|
||||
if (args.category) params.append("category", args.category);
|
||||
if (args.keywords) params.append("keywords", args.keywords);
|
||||
if (args.sortBy) params.append("sortBy", args.sortBy);
|
||||
if (args.sortOrder) params.append("sortOrder", args.sortOrder);
|
||||
if (args.maxPages)
|
||||
params.append("maxPages", args.maxPages.toString());
|
||||
if (args.priceMin)
|
||||
params.append("priceMin", args.priceMin.toString());
|
||||
if (args.priceMax)
|
||||
params.append("priceMax", args.priceMax.toString());
|
||||
if (args.cookies) params.append("cookies", args.cookies);
|
||||
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
result: {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: JSON.stringify(result, null, 2),
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32603, message: `Tool execution failed: ${errorMessage}` },
|
||||
});
|
||||
}
|
||||
}
|
||||
console.log(
|
||||
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/kijiji?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
// Method not found
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32601, message: `Method not found: ${method}` },
|
||||
},
|
||||
{ status: 404 }
|
||||
);
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
error: { code: -32700, message: `Parse error: ${errorMessage}` },
|
||||
},
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] Kijiji API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
} else if (name === "search_facebook") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const params = new URLSearchParams({ q: query });
|
||||
if (args.location) params.append("location", args.location);
|
||||
if (args.maxItems)
|
||||
params.append("maxItems", args.maxItems.toString());
|
||||
if (args.cookiesSource) params.append("cookies", args.cookiesSource);
|
||||
|
||||
console.log(
|
||||
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] Facebook API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
} else if (name === "search_ebay") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const params = new URLSearchParams({ q: query });
|
||||
if (args.minPrice)
|
||||
params.append("minPrice", args.minPrice.toString());
|
||||
if (args.maxPrice)
|
||||
params.append("maxPrice", args.maxPrice.toString());
|
||||
if (args.strictMode !== undefined)
|
||||
params.append("strictMode", args.strictMode.toString());
|
||||
if (args.exclusions?.length)
|
||||
params.append("exclusions", args.exclusions.join(","));
|
||||
if (args.keywords?.length)
|
||||
params.append("keywords", args.keywords.join(","));
|
||||
if (args.buyItNowOnly !== undefined)
|
||||
params.append("buyItNowOnly", args.buyItNowOnly.toString());
|
||||
if (args.canadaOnly !== undefined)
|
||||
params.append("canadaOnly", args.canadaOnly.toString());
|
||||
if (args.maxItems)
|
||||
params.append("maxItems", args.maxItems.toString());
|
||||
if (args.cookies) params.append("cookies", args.cookies);
|
||||
|
||||
console.log(
|
||||
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] eBay API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
} else {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32601, message: `Unknown tool: ${name}` },
|
||||
});
|
||||
}
|
||||
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
result: {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: JSON.stringify(result, null, 2),
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error";
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: {
|
||||
code: -32603,
|
||||
message: `Tool execution failed: ${errorMessage}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Method not found
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32601, message: `Method not found: ${method}` },
|
||||
},
|
||||
{ status: 404 },
|
||||
);
|
||||
} catch (error) {
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error";
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
error: { code: -32700, message: `Parse error: ${errorMessage}` },
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,23 +3,25 @@
|
||||
*/
|
||||
|
||||
export const serverCard = {
|
||||
$schema: "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
|
||||
version: "1.0",
|
||||
protocolVersion: "2025-06-18",
|
||||
serverInfo: {
|
||||
name: "marketplace-scrapers",
|
||||
title: "Marketplace Scrapers MCP Server",
|
||||
version: "1.0.0",
|
||||
},
|
||||
transport: {
|
||||
type: "streamable-http",
|
||||
endpoint: "/mcp",
|
||||
},
|
||||
capabilities: {
|
||||
tools: {
|
||||
listChanged: true,
|
||||
},
|
||||
},
|
||||
description: "Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
|
||||
tools: "dynamic",
|
||||
$schema:
|
||||
"https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
|
||||
version: "1.0",
|
||||
protocolVersion: "2025-06-18",
|
||||
serverInfo: {
|
||||
name: "marketplace-scrapers",
|
||||
title: "Marketplace Scrapers MCP Server",
|
||||
version: "1.0.0",
|
||||
},
|
||||
transport: {
|
||||
type: "streamable-http",
|
||||
endpoint: "/mcp",
|
||||
},
|
||||
capabilities: {
|
||||
tools: {
|
||||
listChanged: true,
|
||||
},
|
||||
},
|
||||
description:
|
||||
"Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
|
||||
tools: "dynamic",
|
||||
};
|
||||
|
||||
@@ -3,103 +3,148 @@
|
||||
*/
|
||||
|
||||
export const tools = [
|
||||
{
|
||||
name: "search_kijiji",
|
||||
description: "Search Kijiji marketplace for listings matching a query",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "Search query for Kijiji listings",
|
||||
},
|
||||
maxItems: {
|
||||
type: "number",
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "search_facebook",
|
||||
description: "Search Facebook Marketplace for listings matching a query",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "Search query for Facebook Marketplace listings",
|
||||
},
|
||||
location: {
|
||||
type: "string",
|
||||
description: "Location for search (e.g., 'toronto')",
|
||||
default: "toronto",
|
||||
},
|
||||
maxItems: {
|
||||
type: "number",
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
cookiesSource: {
|
||||
type: "string",
|
||||
description: "Optional Facebook session cookies source",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "search_ebay",
|
||||
description: "Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "Search query for eBay listings",
|
||||
},
|
||||
minPrice: {
|
||||
type: "number",
|
||||
description: "Minimum price filter",
|
||||
},
|
||||
maxPrice: {
|
||||
type: "number",
|
||||
description: "Maximum price filter",
|
||||
},
|
||||
strictMode: {
|
||||
type: "boolean",
|
||||
description: "Enable strict search mode",
|
||||
default: false,
|
||||
},
|
||||
exclusions: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description: "Terms to exclude from results",
|
||||
},
|
||||
keywords: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description: "Keywords to include in search",
|
||||
},
|
||||
buyItNowOnly: {
|
||||
type: "boolean",
|
||||
description: "Include only Buy It Now listings (exclude auctions)",
|
||||
default: true,
|
||||
},
|
||||
canadaOnly: {
|
||||
type: "boolean",
|
||||
description: "Include only Canadian sellers/listings",
|
||||
default: true,
|
||||
},
|
||||
maxItems: {
|
||||
type: "number",
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "search_kijiji",
|
||||
description: "Search Kijiji marketplace for listings matching a query",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "Search query for Kijiji listings",
|
||||
},
|
||||
location: {
|
||||
type: "string",
|
||||
description:
|
||||
"Location name or ID (e.g., 'toronto', 'gta', 'ontario')",
|
||||
},
|
||||
category: {
|
||||
type: "string",
|
||||
description:
|
||||
"Category name or ID (e.g., 'computers', 'furniture', 'bikes')",
|
||||
},
|
||||
keywords: {
|
||||
type: "string",
|
||||
description: "Additional keywords to filter results",
|
||||
},
|
||||
sortBy: {
|
||||
type: "string",
|
||||
description: "Sort results by field",
|
||||
enum: ["relevancy", "date", "price", "distance"],
|
||||
default: "relevancy",
|
||||
},
|
||||
sortOrder: {
|
||||
type: "string",
|
||||
description: "Sort order",
|
||||
enum: ["asc", "desc"],
|
||||
default: "desc",
|
||||
},
|
||||
maxPages: {
|
||||
type: "number",
|
||||
description: "Maximum pages to fetch (~40 items per page)",
|
||||
default: 5,
|
||||
},
|
||||
priceMin: {
|
||||
type: "number",
|
||||
description: "Minimum price in cents",
|
||||
},
|
||||
priceMax: {
|
||||
type: "number",
|
||||
description: "Maximum price in cents",
|
||||
},
|
||||
cookies: {
|
||||
type: "string",
|
||||
description:
|
||||
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "search_facebook",
|
||||
description: "Search Facebook Marketplace for listings matching a query",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "Search query for Facebook Marketplace listings",
|
||||
},
|
||||
location: {
|
||||
type: "string",
|
||||
description: "Location for search (e.g., 'toronto')",
|
||||
default: "toronto",
|
||||
},
|
||||
maxItems: {
|
||||
type: "number",
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
cookiesSource: {
|
||||
type: "string",
|
||||
description: "Optional Facebook session cookies source",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "search_ebay",
|
||||
description:
|
||||
"Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "Search query for eBay listings",
|
||||
},
|
||||
minPrice: {
|
||||
type: "number",
|
||||
description: "Minimum price filter",
|
||||
},
|
||||
maxPrice: {
|
||||
type: "number",
|
||||
description: "Maximum price filter",
|
||||
},
|
||||
strictMode: {
|
||||
type: "boolean",
|
||||
description: "Enable strict search mode",
|
||||
default: false,
|
||||
},
|
||||
exclusions: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description: "Terms to exclude from results",
|
||||
},
|
||||
keywords: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description: "Keywords to include in search",
|
||||
},
|
||||
buyItNowOnly: {
|
||||
type: "boolean",
|
||||
description: "Include only Buy It Now listings (exclude auctions)",
|
||||
default: true,
|
||||
},
|
||||
canadaOnly: {
|
||||
type: "boolean",
|
||||
description: "Include only Canadian sellers/listings",
|
||||
default: true,
|
||||
},
|
||||
maxItems: {
|
||||
type: "number",
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
cookies: {
|
||||
type: "string",
|
||||
description:
|
||||
"Optional: eBay session cookies to bypass bot detection (format: 'name1=value1; name2=value2')",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
26
scripts/biome-symlink.sh
Executable file
26
scripts/biome-symlink.sh
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Get the path to the system biome executable
|
||||
BIOME_PATH=$(which biome)
|
||||
|
||||
if [ -z "$BIOME_PATH" ]; then
|
||||
echo "Error: biome executable not found in PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Find all biome executables in node_modules
|
||||
files=$(fd biome node_modules --type executable --no-ignore --follow)
|
||||
|
||||
if [ -z "$files" ]; then
|
||||
echo "No biome executables found in node_modules"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Replace each with a symlink to the system biome
|
||||
for file in $files; do
|
||||
echo "Replacing $file with symlink to $BIOME_PATH"
|
||||
rm "$file"
|
||||
ln -s "$BIOME_PATH" "$file"
|
||||
done
|
||||
|
||||
echo "Done."
|
||||
30
scripts/remove-eslint.sh
Executable file
30
scripts/remove-eslint.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
PATTERN="eslint"
|
||||
FILES="$(fd .)" # Or use 'find .' to search recursively
|
||||
|
||||
for file in $FILES; do
|
||||
if [[ -f "$file" ]]; then
|
||||
# 1. Use rg with line numbers (-n) and only the matched line (-o)
|
||||
# 2. Use awk to print ONLY the line number (field 1)
|
||||
# 3. Use xargs to pass multiple line numbers to a single sed command
|
||||
|
||||
LINE_NUMBERS=$(rg --line-number --no-filename "$PATTERN" "$file" | awk -F':' '{print $1}' | tr '\n' ',')
|
||||
|
||||
# Remove trailing comma if any
|
||||
LINE_NUMBERS=${LINE_NUMBERS%,}
|
||||
|
||||
if [[ -n "$LINE_NUMBERS" ]]; then
|
||||
echo "Deleting lines $LINE_NUMBERS from $file..."
|
||||
|
||||
# Use sed to delete the specified comma-separated line numbers in-place (-i)
|
||||
# NOTE: The syntax for -i might vary slightly between GNU sed (Linux) and BSD sed (macOS).
|
||||
sed -i.bak "${LINE_NUMBERS}d" "$file"
|
||||
|
||||
# Optional: Remove the backup file created by sed -i.bak
|
||||
# rm "${file}.bak"
|
||||
else
|
||||
echo "$file: No lines matching pattern found."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
25
scripts/start.sh
Executable file
25
scripts/start.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
# Trap SIGTERM and SIGINT for graceful shutdown
|
||||
trap 'echo "Received shutdown signal, stopping services..."; kill -TERM $API_PID $MCP_PID 2>/dev/null; wait' TERM INT
|
||||
|
||||
# Start API Server in background
|
||||
echo "Starting API Server on port ${API_PORT:-4005}..."
|
||||
bun dist/api/index.js &
|
||||
API_PID=$!
|
||||
|
||||
# Give API server a moment to initialize
|
||||
sleep 1
|
||||
|
||||
# Start MCP Server in background
|
||||
echo "Starting MCP Server on port ${API_PORT:-4006}..."
|
||||
bun dist/mcp/index.js &
|
||||
MCP_PID=$!
|
||||
|
||||
echo "Both services started successfully"
|
||||
echo "API Server PID: $API_PID"
|
||||
echo "MCP Server PID: $MCP_PID"
|
||||
|
||||
# Wait for both processes
|
||||
wait $API_PID $MCP_PID
|
||||
Reference in New Issue
Block a user