Compare commits
31 Commits
da23ca1c3f
...
update
| Author | SHA1 | Date | |
|---|---|---|---|
| e4ab145d70 | |||
| 1dce0392e3 | |||
| 251fcbb7d9 | |||
| 9bc57d6b54 | |||
| 4a467c9f02 | |||
| f944d319c2 | |||
| cf9784a565 | |||
| df0c528535 | |||
| 2f97d3eafd | |||
| 65eb8d1724 | |||
| f3839aba54 | |||
| 90b98bfb09 | |||
| eb6705df0f | |||
| 72525609ed | |||
| 8b0a65860c | |||
| f9b1c7e096 | |||
| 9edc74cbeb | |||
| ee0fca826d | |||
| f7372612fb | |||
| bce126664e | |||
| 8cbf11538e | |||
| 79f47fdaef | |||
| de5069bf2b | |||
| 637f1a4e75 | |||
| 441ff436c4 | |||
| 1f53ec912a | |||
| 053efd815b | |||
| d619fa5d77 | |||
| 050fd0adba | |||
| 7b106c91ce | |||
| 6e0487f8f3 |
181
.dockerignore
181
.dockerignore
@@ -1,145 +1,84 @@
|
||||
# Dependencies
|
||||
# =============================================================================
|
||||
# Dependencies & Build Output
|
||||
# =============================================================================
|
||||
node_modules/
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
bun.sum
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
*.pid.lock
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage/
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# Bower dependency directory (https://bower.io/)
|
||||
bower_components
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (https://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directories
|
||||
jspm_packages/
|
||||
|
||||
# TypeScript cache
|
||||
*.tsbuildinfo
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional eslint cache
|
||||
.eslintcache
|
||||
|
||||
# Microbundle cache
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
dist/
|
||||
out/
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# dotenv environment variables file
|
||||
# =============================================================================
|
||||
# Sensitive Files
|
||||
# =============================================================================
|
||||
.env
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
.env.*
|
||||
.envrc
|
||||
cookies/
|
||||
*.pem
|
||||
*.key
|
||||
*.cert
|
||||
*secret*
|
||||
*credential*
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
# =============================================================================
|
||||
# Development Tools & Config
|
||||
# =============================================================================
|
||||
# Nix/Devenv
|
||||
.devenv/
|
||||
.devenv.flake.nix
|
||||
devenv.*
|
||||
.direnv/
|
||||
|
||||
# Next.js build output
|
||||
.next
|
||||
# Linting/Formatting
|
||||
biome.json
|
||||
.eslintcache
|
||||
.pre-commit-config.yaml
|
||||
|
||||
# Nuxt.js build / generate output
|
||||
.nuxt
|
||||
dist
|
||||
|
||||
# Gatsby files
|
||||
.cache/
|
||||
public
|
||||
|
||||
# Vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# Serverless directories
|
||||
.serverless/
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# IDE and editor files
|
||||
# IDE/Editor
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
# AI Assistant Config
|
||||
.claude/
|
||||
CLAUDE.md
|
||||
AGENTS.md
|
||||
opencode.jsonc
|
||||
|
||||
# Git
|
||||
.git
|
||||
# =============================================================================
|
||||
# Documentation (not needed at runtime)
|
||||
# =============================================================================
|
||||
README.md
|
||||
*.md
|
||||
docs/
|
||||
|
||||
# =============================================================================
|
||||
# Git & Docker (avoid recursive inclusion)
|
||||
# =============================================================================
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Docker
|
||||
Dockerfile*
|
||||
.dockerignore
|
||||
|
||||
# Documentation
|
||||
README.md
|
||||
docs/
|
||||
|
||||
# Test files
|
||||
# =============================================================================
|
||||
# Testing & Coverage
|
||||
# =============================================================================
|
||||
test/
|
||||
tests/
|
||||
*.test.js
|
||||
*.test.ts
|
||||
*.spec.js
|
||||
*.spec.ts
|
||||
coverage/
|
||||
*.lcov
|
||||
.nyc_output/
|
||||
|
||||
# Development files
|
||||
CLAUDE.md
|
||||
devenv.*
|
||||
# =============================================================================
|
||||
# OS & Misc
|
||||
# =============================================================================
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
*.log
|
||||
|
||||
# Runtime cookies/config
|
||||
cookies/
|
||||
*.pid
|
||||
.cache/
|
||||
examples/
|
||||
scripts/
|
||||
|
||||
54
AGENTS.md
54
AGENTS.md
@@ -83,7 +83,7 @@ HTTP server using `Bun.serve()` on port 4005 (or `PORT` env var).
|
||||
- `GET /api/status` - Health check
|
||||
- `GET /api/kijiji?q={query}` - Search Kijiji
|
||||
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}` - Search Facebook
|
||||
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=` - Search eBay
|
||||
- `GET /api/ebay?q={query}&minPrice=&maxPrice=&strictMode=&exclusions=&keywords=&buyItNowOnly=&canadaOnly=&cookies=` - Search eBay
|
||||
- `GET /api/*` - 404 fallback
|
||||
|
||||
### MCP Server (`@marketplace-scrapers/mcp-server`)
|
||||
@@ -96,7 +96,7 @@ MCP JSON-RPC 2.0 server on port 4006 (or `MCP_PORT` env var).
|
||||
**Tools:**
|
||||
- `search_kijiji` - Search Kijiji (query, maxItems)
|
||||
- `search_facebook` - Search Facebook (query, location, maxItems, cookiesSource)
|
||||
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems)
|
||||
- `search_ebay` - Search eBay (query, minPrice, maxPrice, strictMode, exclusions, keywords, buyItNowOnly, canadaOnly, maxItems, cookies)
|
||||
|
||||
## API Response Formats
|
||||
|
||||
@@ -117,6 +117,52 @@ All scrapers return arrays of listing objects with these common fields:
|
||||
### eBay-specific fields
|
||||
Minimal - mainly the common fields
|
||||
|
||||
## Cookie Management
|
||||
|
||||
Both **Facebook Marketplace** and **eBay** require valid session cookies for reliable scraping.
|
||||
|
||||
### Cookie Priority Hierarchy (High → Low)
|
||||
All scrapers follow this loading order:
|
||||
1. **URL/API Parameter** - Passed directly via `cookies` parameter (highest priority)
|
||||
2. **Environment Variable** - `FACEBOOK_COOKIE` or `EBAY_COOKIE`
|
||||
3. **Cookie File** - `cookies/facebook.json` or `cookies/ebay.json` (fallback)
|
||||
|
||||
### Facebook Cookies
|
||||
- **Required for**: Facebook Marketplace scraping
|
||||
- **Format**: JSON array (see `cookies/README.md`)
|
||||
- **Key cookies**: `c_user`, `xs`, `fr`, `datr`, `sb`
|
||||
|
||||
**Setup:**
|
||||
```bash
|
||||
# Option 1: File (fallback)
|
||||
# Create cookies/facebook.json with cookie array
|
||||
|
||||
# Option 2: Environment variable
|
||||
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||
|
||||
# Option 3: URL parameter (highest priority)
|
||||
curl "http://localhost:4005/api/facebook?q=laptop&cookies=[{...}]"
|
||||
```
|
||||
|
||||
### eBay Cookies
|
||||
- **Required for**: Bypassing bot detection
|
||||
- **Format**: Cookie string `"name=value; name2=value2"`
|
||||
- **Key cookies**: `s`, `ds2`, `ebay`, `dp1`, `nonsession`
|
||||
|
||||
**Setup:**
|
||||
```bash
|
||||
# Option 1: File (fallback)
|
||||
# Create cookies/ebay.json with cookie string
|
||||
|
||||
# Option 2: Environment variable
|
||||
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||
|
||||
# Option 3: URL parameter (highest priority)
|
||||
curl "http://localhost:4005/api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE"
|
||||
```
|
||||
|
||||
**Important - eBay Bot Detection**: Without cookies, eBay returns a "Checking your browser" challenge page instead of listings.
|
||||
|
||||
## Technical Details
|
||||
|
||||
- **TypeScript** with path mapping (`@/*` → `src/*`) per package
|
||||
@@ -126,7 +172,7 @@ Minimal - mainly the common fields
|
||||
|
||||
## Development Notes
|
||||
|
||||
- Facebook requires valid session cookies - set `FACEBOOK_COOKIE` env var or create `cookies/facebook.json`
|
||||
- eBay uses custom headers to bypass basic bot detection
|
||||
- **Cookie files** are git-ignored for security (see `cookies/README.md`)
|
||||
- Kijiji parses Apollo state from Next.js hydration data
|
||||
- All scrapers handle retries on 429/5xx errors
|
||||
- Cookie priority ensures flexibility across different deployment environments
|
||||
|
||||
@@ -1,24 +1,33 @@
|
||||
# Facebook Marketplace Cookies Setup
|
||||
# Marketplace Cookies Setup
|
||||
|
||||
To use the Facebook Marketplace scraper, you need to provide valid Facebook session cookies.
|
||||
Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings.
|
||||
|
||||
## Option 1: Cookies File (`facebook.json`)
|
||||
## Cookie Priority Hierarchy
|
||||
|
||||
1. Log into Facebook in your browser
|
||||
2. Open Developer Tools → Network tab
|
||||
3. Visit facebook.com/marketplace (ensure you're logged in)
|
||||
4. Look for any marketplace-related requests in the Network tab
|
||||
5. Export cookies from the browser's Application/Storage → Cookies section
|
||||
6. Save the cookies as a JSON array to `facebook.json`
|
||||
All scrapers follow this priority order (highest to lowest):
|
||||
1. **URL Parameter** - Passed directly in API/MCP request (overrides all)
|
||||
2. **Environment Variable** - Set as `FACEBOOK_COOKIE` or `EBAY_COOKIE`
|
||||
3. **Cookie File** - Stored in `facebook.json` or `ebay.json` (fallback)
|
||||
|
||||
The `facebook.json` file should contain Facebook session cookies, particularly:
|
||||
---
|
||||
|
||||
## Facebook Marketplace (`facebook.json`)
|
||||
|
||||
### Required Cookies
|
||||
- `c_user`: Your Facebook user ID
|
||||
- `xs`: Facebook session token
|
||||
- `fr`: Facebook request token
|
||||
- `datr`: Data attribution token
|
||||
- `sb`: Session browser token
|
||||
|
||||
Example structure:
|
||||
### Setup Methods
|
||||
|
||||
**Method 1: Cookie File (Lowest Priority)**
|
||||
1. Log into Facebook in your browser
|
||||
2. Open Developer Tools → Application/Storage → Cookies
|
||||
3. Export cookies as JSON array to `facebook.json`
|
||||
|
||||
Example `facebook.json`:
|
||||
```json
|
||||
[
|
||||
{
|
||||
@@ -27,26 +36,59 @@ Example structure:
|
||||
"domain": ".facebook.com",
|
||||
"path": "/",
|
||||
"secure": true
|
||||
},
|
||||
// ... other cookies
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## Option 2: URL Parameter
|
||||
|
||||
You can pass cookies directly via the `cookies` URL parameter:
|
||||
|
||||
**Method 2: Environment Variable**
|
||||
```bash
|
||||
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||
```
|
||||
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123","domain":".facebook.com",...}]
|
||||
|
||||
**Method 3: URL Parameter (Highest Priority)**
|
||||
```
|
||||
GET /api/facebook?q=laptop&cookies=[{"name":"c_user","value":"123",...}]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## eBay (`ebay.json`)
|
||||
|
||||
eBay has aggressive bot detection that blocks requests without valid session cookies.
|
||||
|
||||
### Setup Methods
|
||||
|
||||
**Method 1: Cookie File (Lowest Priority)**
|
||||
1. Log into eBay in your browser
|
||||
2. Open Developer Tools → Network tab
|
||||
3. Visit ebay.ca and inspect any request headers
|
||||
4. Copy the full `Cookie` header value
|
||||
5. Save as plain text to `ebay.json` (see `ebay.json.example`)
|
||||
|
||||
Example `ebay.json`:
|
||||
```
|
||||
s=VALUE; ds2=VALUE; ebay=VALUE; dp1=VALUE; nonsession=VALUE
|
||||
```
|
||||
|
||||
**Method 2: Environment Variable**
|
||||
```bash
|
||||
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||
```
|
||||
|
||||
**Method 3: URL Parameter (Highest Priority)**
|
||||
```
|
||||
GET /api/ebay?q=laptop&cookies=s=VALUE;ds2=VALUE;ebay=VALUE
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Important Notes
|
||||
|
||||
- Cookies must be from an active Facebook session
|
||||
- Cookies expire, so you may need to refresh them periodically
|
||||
- Never share real cookies or commit them to version control
|
||||
- Facebook may block automated scraping even with valid cookies
|
||||
- Cookies must be from active browser sessions
|
||||
- Cookies expire and need periodic refresh
|
||||
- **NEVER** commit real cookies to version control
|
||||
- Platforms may still block automated scraping despite valid cookies
|
||||
|
||||
## Security
|
||||
|
||||
The cookies file is intentionally left out of version control for security reasons.</content>
|
||||
All `*.json` files in this directory are git-ignored for security.</content>
|
||||
|
||||
1
cookies/ebay.json.example
Normal file
1
cookies/ebay.json.example
Normal file
@@ -0,0 +1 @@
|
||||
s=YOUR_VALUE; ds2=YOUR_VALUE; ebay=YOUR_VALUE; dp1=YOUR_VALUE; nonsession=YOUR_VALUE
|
||||
9
opencode.jsonc
Normal file
9
opencode.jsonc
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"$schema": "https://opencode.ai/config.json",
|
||||
"mcp": {
|
||||
"marketplace-scrape": {
|
||||
"type": "remote",
|
||||
"url": "http://localhost:4006/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
12
package.json
12
package.json
@@ -2,11 +2,19 @@
|
||||
"name": "marketplace-scrapers-monorepo",
|
||||
"version": "1.0.0",
|
||||
"scripts": {
|
||||
"ci": "biome ci"
|
||||
"ci": "biome ci",
|
||||
"clean": "rm -rf dist",
|
||||
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
|
||||
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
|
||||
"build:all": "bun run build:api && bun run build:mcp",
|
||||
"build": "bun run clean && bun run build:all",
|
||||
"start": "./scripts/start.sh"
|
||||
},
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"workspaces": ["packages/*"],
|
||||
"workspaces": [
|
||||
"packages/*"
|
||||
],
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "2.3.11"
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { statusRoute } from "./routes/status";
|
||||
import { kijijiRoute } from "./routes/kijiji";
|
||||
import { facebookRoute } from "./routes/facebook";
|
||||
import { ebayRoute } from "./routes/ebay";
|
||||
import { facebookRoute } from "./routes/facebook";
|
||||
import { kijijiRoute } from "./routes/kijiji";
|
||||
import { statusRoute } from "./routes/status";
|
||||
|
||||
const PORT = process.env.PORT || 4005;
|
||||
|
||||
@@ -22,7 +22,7 @@ const server = Bun.serve({
|
||||
},
|
||||
|
||||
// Fallback for all other routes
|
||||
fetch(req: Request) {
|
||||
fetch(_req: Request) {
|
||||
return new Response("Not Found", { status: 404 });
|
||||
},
|
||||
});
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
||||
|
||||
/**
|
||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
|
||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}&cookies={cookies}
|
||||
* Search eBay for listings (default: Buy It Now only, Canada only)
|
||||
* Optional: Pass cookies parameter to bypass bot detection
|
||||
*/
|
||||
export async function ebayRoute(req: Request): Promise<Response> {
|
||||
try {
|
||||
const reqUrl = new URL(req.url);
|
||||
|
||||
const SEARCH_QUERY =
|
||||
@@ -18,23 +20,27 @@ export async function ebayRoute(req: Request): Promise<Response> {
|
||||
{ status: 400 },
|
||||
);
|
||||
|
||||
// Parse optional parameters with defaults
|
||||
const minPrice = reqUrl.searchParams.get("minPrice")
|
||||
? parseInt(reqUrl.searchParams.get("minPrice")!)
|
||||
: undefined;
|
||||
const maxPrice = reqUrl.searchParams.get("maxPrice")
|
||||
? parseInt(reqUrl.searchParams.get("maxPrice")!)
|
||||
: undefined;
|
||||
const minPriceParam = reqUrl.searchParams.get("minPrice");
|
||||
const minPrice = minPriceParam ? parseInt(minPriceParam, 10) : undefined;
|
||||
const maxPriceParam = reqUrl.searchParams.get("maxPrice");
|
||||
const maxPrice = maxPriceParam ? parseInt(maxPriceParam, 10) : undefined;
|
||||
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
|
||||
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
|
||||
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
|
||||
const exclusionsParam = reqUrl.searchParams.get("exclusions");
|
||||
const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : [];
|
||||
const exclusions = exclusionsParam
|
||||
? exclusionsParam.split(",").map((s) => s.trim())
|
||||
: [];
|
||||
const keywordsParam = reqUrl.searchParams.get("keywords");
|
||||
const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY];
|
||||
const keywords = keywordsParam
|
||||
? keywordsParam.split(",").map((s) => s.trim())
|
||||
: [SEARCH_QUERY];
|
||||
|
||||
try {
|
||||
const items = await fetchEbayItems(SEARCH_QUERY, 5, {
|
||||
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
|
||||
const cookies = reqUrl.searchParams.get("cookies") || undefined;
|
||||
|
||||
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
|
||||
minPrice,
|
||||
maxPrice,
|
||||
strictMode,
|
||||
@@ -42,19 +48,21 @@ export async function ebayRoute(req: Request): Promise<Response> {
|
||||
keywords,
|
||||
buyItNowOnly,
|
||||
canadaOnly,
|
||||
cookies,
|
||||
});
|
||||
if (!items || items.length === 0)
|
||||
|
||||
const results = maxItems ? items.slice(0, maxItems) : items;
|
||||
|
||||
if (!results || results.length === 0)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(items, { status: 200 });
|
||||
return Response.json(results, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("eBay scraping error:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json(
|
||||
{ message: errorMessage },
|
||||
{ status: 400 },
|
||||
);
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json({ message: errorMessage }, { status: 400 });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,17 +12,25 @@ export async function facebookRoute(req: Request): Promise<Response> {
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message:
|
||||
"Request didn't have 'query' header or 'q' search parameter!",
|
||||
message: "Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
|
||||
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
|
||||
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
|
||||
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : 25;
|
||||
|
||||
try {
|
||||
const items = await fetchFacebookItems(SEARCH_QUERY, 5, LOCATION, 25, COOKIES_SOURCE);
|
||||
const items = await fetchFacebookItems(
|
||||
SEARCH_QUERY,
|
||||
1,
|
||||
LOCATION,
|
||||
maxItems,
|
||||
COOKIES_SOURCE,
|
||||
undefined,
|
||||
);
|
||||
if (!items || items.length === 0)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
@@ -31,10 +39,8 @@ export async function facebookRoute(req: Request): Promise<Response> {
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("Facebook scraping error:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json(
|
||||
{ message: errorMessage },
|
||||
{ status: 400 },
|
||||
);
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json({ message: errorMessage }, { status: 400 });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,14 +12,46 @@ export async function kijijiRoute(req: Request): Promise<Response> {
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message:
|
||||
"Request didn't have 'query' header or 'q' search parameter!",
|
||||
message: "Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
|
||||
const maxPagesParam = reqUrl.searchParams.get("maxPages");
|
||||
const maxPages = maxPagesParam ? parseInt(maxPagesParam, 10) : 5;
|
||||
const priceMinParam = reqUrl.searchParams.get("priceMin");
|
||||
const priceMin = priceMinParam ? parseInt(priceMinParam, 10) : undefined;
|
||||
const priceMaxParam = reqUrl.searchParams.get("priceMax");
|
||||
const priceMax = priceMaxParam ? parseInt(priceMaxParam, 10) : undefined;
|
||||
|
||||
const searchOptions = {
|
||||
location: reqUrl.searchParams.get("location") || undefined,
|
||||
category: reqUrl.searchParams.get("category") || undefined,
|
||||
keywords: reqUrl.searchParams.get("keywords") || undefined,
|
||||
sortBy: reqUrl.searchParams.get("sortBy") as
|
||||
| "relevancy"
|
||||
| "date"
|
||||
| "price"
|
||||
| "distance"
|
||||
| undefined,
|
||||
sortOrder: reqUrl.searchParams.get("sortOrder") as
|
||||
| "desc"
|
||||
| "asc"
|
||||
| undefined,
|
||||
maxPages,
|
||||
priceMin,
|
||||
priceMax,
|
||||
cookies: reqUrl.searchParams.get("cookies") || undefined,
|
||||
};
|
||||
|
||||
try {
|
||||
const items = await fetchKijijiItems(SEARCH_QUERY, 5);
|
||||
const items = await fetchKijijiItems(
|
||||
SEARCH_QUERY,
|
||||
4, // 4 requests per second for faster scraping
|
||||
"https://www.kijiji.ca",
|
||||
searchOptions,
|
||||
{},
|
||||
);
|
||||
if (!items)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
@@ -28,10 +60,8 @@ export async function kijijiRoute(req: Request): Promise<Response> {
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("Kijiji scraping error:", error);
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json(
|
||||
{ message: errorMessage },
|
||||
{ status: 400 },
|
||||
);
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json({ message: errorMessage }, { status: 400 });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,45 +1,43 @@
|
||||
// Export all scrapers
|
||||
|
||||
export type { EbayListingDetails } from "./scrapers/ebay";
|
||||
export { default as fetchEbayItems } from "./scrapers/ebay";
|
||||
export type { FacebookListingDetails } from "./scrapers/facebook";
|
||||
export {
|
||||
default as fetchFacebookItems,
|
||||
ensureFacebookCookies,
|
||||
extractFacebookItemData,
|
||||
extractFacebookMarketplaceData,
|
||||
fetchFacebookItem,
|
||||
parseFacebookAds,
|
||||
parseFacebookCookieString,
|
||||
parseFacebookItem,
|
||||
} from "./scrapers/facebook";
|
||||
export type {
|
||||
DetailedListing,
|
||||
KijijiListingDetails,
|
||||
ListingFetchOptions,
|
||||
SearchOptions,
|
||||
} from "./scrapers/kijiji";
|
||||
export {
|
||||
default as fetchKijijiItems,
|
||||
slugify,
|
||||
resolveLocationId,
|
||||
resolveCategoryId,
|
||||
buildSearchUrl,
|
||||
default as fetchKijijiItems,
|
||||
extractApolloState,
|
||||
parseSearch,
|
||||
parseDetailedListing,
|
||||
HttpError,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
parseDetailedListing,
|
||||
parseSearch,
|
||||
RateLimitError,
|
||||
resolveCategoryId,
|
||||
resolveLocationId,
|
||||
slugify,
|
||||
ValidationError,
|
||||
} from "./scrapers/kijiji";
|
||||
export type {
|
||||
KijijiListingDetails,
|
||||
DetailedListing,
|
||||
SearchOptions,
|
||||
ListingFetchOptions,
|
||||
} from "./scrapers/kijiji";
|
||||
|
||||
export {
|
||||
default as fetchFacebookItems,
|
||||
fetchFacebookItem,
|
||||
parseFacebookCookieString,
|
||||
ensureFacebookCookies,
|
||||
extractFacebookMarketplaceData,
|
||||
extractFacebookItemData,
|
||||
parseFacebookAds,
|
||||
parseFacebookItem,
|
||||
} from "./scrapers/facebook";
|
||||
export type { FacebookListingDetails } from "./scrapers/facebook";
|
||||
|
||||
export { default as fetchEbayItems } from "./scrapers/ebay";
|
||||
export type { EbayListingDetails } from "./scrapers/ebay";
|
||||
|
||||
// Export shared utilities
|
||||
export * from "./utils/http";
|
||||
export * from "./utils/delay";
|
||||
export * from "./utils/format";
|
||||
|
||||
// Export shared types
|
||||
export * from "./types/common";
|
||||
// Export shared utilities
|
||||
export * from "./utils/cookies";
|
||||
export * from "./utils/delay";
|
||||
export * from "./utils/format";
|
||||
export * from "./utils/http";
|
||||
|
||||
@@ -1,9 +1,18 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { parseHTML } from "linkedom";
|
||||
import { isRecord } from "../utils/http";
|
||||
import {
|
||||
type CookieConfig,
|
||||
formatCookiesForHeader,
|
||||
loadCookiesOptional,
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import type { HTMLString } from "../types/common";
|
||||
|
||||
// eBay cookie configuration
|
||||
const EBAY_COOKIE_CONFIG: CookieConfig = {
|
||||
name: "eBay",
|
||||
domain: ".ebay.ca",
|
||||
envVar: "EBAY_COOKIE",
|
||||
filePath: "./cookies/ebay.json",
|
||||
};
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
|
||||
@@ -29,8 +38,10 @@ export interface EbayListingDetails {
|
||||
/**
|
||||
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
|
||||
*/
|
||||
function parseEbayPrice(priceText: string): { cents: number; currency: string } | null {
|
||||
if (!priceText || typeof priceText !== 'string') return null;
|
||||
function parseEbayPrice(
|
||||
priceText: string,
|
||||
): { cents: number; currency: string } | null {
|
||||
if (!priceText || typeof priceText !== "string") return null;
|
||||
|
||||
// Clean up the price text and extract currency and amount
|
||||
const cleaned = priceText.trim();
|
||||
@@ -39,19 +50,23 @@ function parseEbayPrice(priceText: string): { cents: number; currency: string }
|
||||
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
|
||||
if (!numberMatches) return null;
|
||||
|
||||
const amountStr = numberMatches[0].replace(/,/g, '');
|
||||
const amountStr = numberMatches[0].replace(/,/g, "");
|
||||
const dollars = parseFloat(amountStr);
|
||||
if (isNaN(dollars)) return null;
|
||||
if (Number.isNaN(dollars)) return null;
|
||||
|
||||
const cents = Math.round(dollars * 100);
|
||||
|
||||
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
|
||||
let currency = 'USD'; // Default
|
||||
let currency = "USD"; // Default
|
||||
|
||||
if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) {
|
||||
currency = 'CAD';
|
||||
} else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) {
|
||||
currency = 'USD';
|
||||
if (
|
||||
cleaned.toUpperCase().includes("CAD") ||
|
||||
cleaned.includes("CA$") ||
|
||||
cleaned.includes("C $")
|
||||
) {
|
||||
currency = "CAD";
|
||||
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
|
||||
currency = "USD";
|
||||
}
|
||||
|
||||
return { cents, currency };
|
||||
@@ -77,7 +92,7 @@ function parseEbayListings(
|
||||
htmlString: HTMLString,
|
||||
keywords: string[],
|
||||
exclusions: string[],
|
||||
strictMode: boolean
|
||||
strictMode: boolean,
|
||||
): EbayListingDetails[] {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const results: EbayListingDetails[] = [];
|
||||
@@ -85,38 +100,60 @@ function parseEbayListings(
|
||||
// Find all listing links by looking for eBay item URLs (/itm/)
|
||||
const linkElements = document.querySelectorAll('a[href*="itm/"]');
|
||||
|
||||
|
||||
for (const linkElement of linkElements) {
|
||||
try {
|
||||
// Get href attribute
|
||||
let href = linkElement.getAttribute('href');
|
||||
let href = linkElement.getAttribute("href");
|
||||
if (!href) continue;
|
||||
|
||||
// Make href absolute
|
||||
if (!href.startsWith('http')) {
|
||||
href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`;
|
||||
if (!href.startsWith("http")) {
|
||||
href = href.startsWith("//")
|
||||
? `https:${href}`
|
||||
: `https://www.ebay.com${href}`;
|
||||
}
|
||||
|
||||
// Find the container - go up several levels to find the item container
|
||||
// Modern eBay uses complex nested structures
|
||||
let container = linkElement.parentElement?.parentElement?.parentElement;
|
||||
if (!container) {
|
||||
// Try a different level
|
||||
container = linkElement.parentElement?.parentElement;
|
||||
// Modern eBay uses complex nested structures (often 5-10 levels deep)
|
||||
let container: Element | null = linkElement;
|
||||
let depth = 0;
|
||||
const maxDepth = 15;
|
||||
|
||||
// Walk up until we find a list item or results container
|
||||
while (container && depth < maxDepth) {
|
||||
const classes = container.className || "";
|
||||
if (
|
||||
classes.includes("s-item") ||
|
||||
classes.includes("srp-results") ||
|
||||
container.tagName === "LI"
|
||||
) {
|
||||
break;
|
||||
}
|
||||
if (!container) continue;
|
||||
container = container.parentElement;
|
||||
depth++;
|
||||
}
|
||||
|
||||
if (!container || depth >= maxDepth) continue;
|
||||
|
||||
// Extract title - look for heading or title-related elements near the link
|
||||
// Modern eBay often uses h3, span, or div with text content near the link
|
||||
let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span');
|
||||
let titleElement = container.querySelector(
|
||||
'h3, [role="heading"], .s-item__title span',
|
||||
);
|
||||
|
||||
// If no direct title element, try finding text content around the link
|
||||
if (!titleElement) {
|
||||
// Look for spans or divs with text near this link
|
||||
const nearbySpans = container.querySelectorAll('span, div');
|
||||
const nearbySpans = container.querySelectorAll("span, div");
|
||||
for (const span of nearbySpans) {
|
||||
const text = span.textContent?.trim();
|
||||
if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) {
|
||||
if (
|
||||
text &&
|
||||
text.length > 10 &&
|
||||
text.length < 200 &&
|
||||
!text.includes("$") &&
|
||||
!text.includes("item")
|
||||
) {
|
||||
titleElement = span;
|
||||
break;
|
||||
}
|
||||
@@ -129,12 +166,12 @@ function parseEbayListings(
|
||||
if (title) {
|
||||
// Remove common eBay UI strings that appear at the end of titles
|
||||
const uiStrings = [
|
||||
'Opens in a new window',
|
||||
'Opens in a new tab',
|
||||
'Opens in a new window or tab',
|
||||
'opens in a new window',
|
||||
'opens in a new tab',
|
||||
'opens in a new window or tab'
|
||||
"Opens in a new window",
|
||||
"Opens in a new tab",
|
||||
"Opens in a new window or tab",
|
||||
"opens in a new window",
|
||||
"opens in a new tab",
|
||||
"opens in a new window or tab",
|
||||
];
|
||||
|
||||
for (const uiString of uiStrings) {
|
||||
@@ -157,17 +194,28 @@ function parseEbayListings(
|
||||
if (title === "Shop on eBay" || title.length < 3) continue;
|
||||
|
||||
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
||||
let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]');
|
||||
// Updated for 2026 eBay HTML structure
|
||||
let priceElement = container.querySelector(
|
||||
'[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]',
|
||||
);
|
||||
|
||||
// If no direct price class, look for spans containing $ (but not titles)
|
||||
if (!priceElement) {
|
||||
const spansAndElements = container.querySelectorAll('span, div, b, em, strong');
|
||||
const spansAndElements = container.querySelectorAll(
|
||||
"span, div, b, em, strong",
|
||||
);
|
||||
for (const el of spansAndElements) {
|
||||
const text = el.textContent?.trim();
|
||||
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
|
||||
if (text && text.includes('$') && text.length < 100 &&
|
||||
!text.includes('laptop') && !text.includes('computer') && !text.includes('intel') &&
|
||||
!text.includes('core') && !text.includes('ram') && !text.includes('ssd') &&
|
||||
if (
|
||||
text?.includes("$") &&
|
||||
text.length < 100 &&
|
||||
!text.includes("laptop") &&
|
||||
!text.includes("computer") &&
|
||||
!text.includes("intel") &&
|
||||
!text.includes("core") &&
|
||||
!text.includes("ram") &&
|
||||
!text.includes("ssd") &&
|
||||
!/\d{4}/.test(text) && // Avoid years like "2024"
|
||||
!text.includes('"') // Avoid measurements
|
||||
) {
|
||||
@@ -181,17 +229,26 @@ function parseEbayListings(
|
||||
// Prefer sale/current price over original/strikethrough price
|
||||
if (priceElement) {
|
||||
// Check if this element or its parent contains multiple price elements
|
||||
const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement;
|
||||
const priceContainer =
|
||||
priceElement.closest('[class*="s-item__price"]') ||
|
||||
priceElement.parentElement;
|
||||
|
||||
if (priceContainer) {
|
||||
// Look for all price elements within this container, including strikethrough prices
|
||||
const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike');
|
||||
const allPriceElements = priceContainer.querySelectorAll(
|
||||
'[class*="s-item__price"], span, b, em, strong, s, del, strike',
|
||||
);
|
||||
|
||||
// Filter to only elements that actually contain prices (not labels)
|
||||
const actualPrices: HTMLElement[] = [];
|
||||
for (const el of allPriceElements) {
|
||||
const text = el.textContent?.trim();
|
||||
if (text && /^\s*[$£€¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) {
|
||||
if (
|
||||
text &&
|
||||
/^\s*[$£€¥]/u.test(text) &&
|
||||
text.length < 50 &&
|
||||
!/\d{4}/.test(text)
|
||||
) {
|
||||
actualPrices.push(el);
|
||||
}
|
||||
}
|
||||
@@ -199,11 +256,18 @@ function parseEbayListings(
|
||||
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
|
||||
if (actualPrices.length > 1) {
|
||||
// First, look for prices that are NOT struck through
|
||||
const nonStrikethroughPrices = actualPrices.filter(el => {
|
||||
const nonStrikethroughPrices = actualPrices.filter((el) => {
|
||||
const tagName = el.tagName.toLowerCase();
|
||||
const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') ||
|
||||
el.closest('s, del, strike');
|
||||
return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles;
|
||||
const styles =
|
||||
el.classList.contains("s-strikethrough") ||
|
||||
el.classList.contains("u-flStrike") ||
|
||||
el.closest("s, del, strike");
|
||||
return (
|
||||
tagName !== "s" &&
|
||||
tagName !== "del" &&
|
||||
tagName !== "strike" &&
|
||||
!styles
|
||||
);
|
||||
});
|
||||
|
||||
if (nonStrikethroughPrices.length > 0) {
|
||||
@@ -227,12 +291,22 @@ function parseEbayListings(
|
||||
if (!priceInfo) continue;
|
||||
|
||||
// Apply exclusion filters
|
||||
if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) {
|
||||
if (
|
||||
exclusions.some((exclusion) =>
|
||||
title.toLowerCase().includes(exclusion.toLowerCase()),
|
||||
)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Apply strict mode filter (title must contain at least one keyword)
|
||||
if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) {
|
||||
if (
|
||||
strictMode &&
|
||||
title &&
|
||||
!keywords.some((keyword) =>
|
||||
title.toLowerCase().includes(keyword.toLowerCase()),
|
||||
)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -258,6 +332,32 @@ function parseEbayListings(
|
||||
return results;
|
||||
}
|
||||
|
||||
// ----------------------------- Cookie Loading -----------------------------
|
||||
|
||||
/**
|
||||
* Load eBay cookies with priority: URL param > ENV var > file
|
||||
* Uses shared cookie utility for consistent handling across all scrapers
|
||||
*/
|
||||
async function loadEbayCookies(
|
||||
cookiesSource?: string,
|
||||
): Promise<string | undefined> {
|
||||
const cookies = await loadCookiesOptional(EBAY_COOKIE_CONFIG, cookiesSource);
|
||||
|
||||
if (cookies.length === 0) {
|
||||
console.warn(
|
||||
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
|
||||
"Provide cookies via (in priority order):\n" +
|
||||
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
||||
" 2. EBAY_COOKIE environment variable, or\n" +
|
||||
" 3. ./cookies/ebay.json file (lowest priority)\n" +
|
||||
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||
);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return formatCookiesForHeader(cookies, "www.ebay.ca");
|
||||
}
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchEbayItems(
|
||||
@@ -271,6 +371,7 @@ export default async function fetchEbayItems(
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||
} = {},
|
||||
) {
|
||||
const {
|
||||
@@ -281,8 +382,12 @@ export default async function fetchEbayItems(
|
||||
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
||||
buyItNowOnly = true,
|
||||
canadaOnly = true,
|
||||
cookies: cookiesSource,
|
||||
} = opts;
|
||||
|
||||
// Load eBay cookies with priority: URL param > ENV var > file
|
||||
const cookies = await loadEbayCookies(cookiesSource);
|
||||
|
||||
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
||||
const urlParams = new URLSearchParams({
|
||||
_nkw: SEARCH_QUERY,
|
||||
@@ -307,20 +412,26 @@ export default async function fetchEbayItems(
|
||||
try {
|
||||
// Use custom headers modeled after real browser requests to bypass bot detection
|
||||
const headers: Record<string, string> = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Referer': 'https://www.ebay.ca/',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Priority': 'u=0, i'
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
|
||||
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||
Referer: "https://www.ebay.ca/",
|
||||
Connection: "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"Sec-Fetch-User": "?1",
|
||||
Priority: "u=0, i",
|
||||
};
|
||||
|
||||
// Add cookies if available (helps bypass bot detection)
|
||||
if (cookies) {
|
||||
headers.Cookie = cookies;
|
||||
}
|
||||
|
||||
const res = await fetch(searchUrl, {
|
||||
method: "GET",
|
||||
headers,
|
||||
@@ -340,17 +451,21 @@ export default async function fetchEbayItems(
|
||||
|
||||
console.log(`\nParsing eBay listings...`);
|
||||
|
||||
const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode);
|
||||
const listings = parseEbayListings(
|
||||
searchHtml,
|
||||
keywords,
|
||||
exclusions,
|
||||
strictMode,
|
||||
);
|
||||
|
||||
// Filter by price range (additional safety check)
|
||||
const filteredListings = listings.filter(listing => {
|
||||
const filteredListings = listings.filter((listing) => {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
return cents && cents >= minPrice && cents <= maxPrice;
|
||||
});
|
||||
|
||||
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
||||
return filteredListings;
|
||||
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
|
||||
@@ -1,10 +1,16 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { parseHTML } from "linkedom";
|
||||
import cliProgress from "cli-progress";
|
||||
import { isRecord } from "../utils/http";
|
||||
import { parseHTML } from "linkedom";
|
||||
import type { HTMLString } from "../types/common";
|
||||
import {
|
||||
type Cookie,
|
||||
type CookieConfig,
|
||||
ensureCookies,
|
||||
formatCookiesForHeader,
|
||||
parseCookieString,
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import type { HTMLString } from "../types/common";
|
||||
import { isRecord } from "../utils/http";
|
||||
|
||||
/**
|
||||
* Facebook Marketplace Scraper
|
||||
@@ -14,21 +20,13 @@ import type { HTMLString } from "../types/common";
|
||||
* This is by design to respect Facebook's authentication requirements.
|
||||
*/
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
|
||||
interface Cookie {
|
||||
name: string;
|
||||
value: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
secure?: boolean;
|
||||
httpOnly?: boolean;
|
||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||
session?: boolean;
|
||||
expirationDate?: number;
|
||||
partitionKey?: Record<string, unknown>;
|
||||
storeId?: string;
|
||||
}
|
||||
// Facebook cookie configuration
|
||||
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
|
||||
name: "Facebook",
|
||||
domain: ".facebook.com",
|
||||
envVar: "FACEBOOK_COOKIE",
|
||||
filePath: "./cookies/facebook.json",
|
||||
};
|
||||
|
||||
interface FacebookAdNode {
|
||||
node: {
|
||||
@@ -204,171 +202,31 @@ export interface FacebookListingDetails {
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
/**
|
||||
* Load Facebook cookies from file or string
|
||||
*/
|
||||
async function loadFacebookCookies(
|
||||
cookiesSource?: string,
|
||||
cookiePath = "./cookies/facebook.json"
|
||||
): Promise<Cookie[]> {
|
||||
// First try to load from provided string parameter
|
||||
if (cookiesSource) {
|
||||
try {
|
||||
const cookies = JSON.parse(cookiesSource);
|
||||
if (Array.isArray(cookies)) {
|
||||
return cookies.filter(
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string"
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
throw new Error(`Invalid cookies JSON provided: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to load from specified path
|
||||
try {
|
||||
const cookiesPath = cookiePath;
|
||||
const file = Bun.file(cookiesPath);
|
||||
if (await file.exists()) {
|
||||
const content = await file.text();
|
||||
const cookies = JSON.parse(content);
|
||||
if (Array.isArray(cookies)) {
|
||||
return cookies.filter(
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string"
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Facebook cookie string into Cookie array format
|
||||
* @deprecated Use parseCookieString from utils/cookies instead
|
||||
*/
|
||||
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
||||
if (!cookieString || !cookieString.trim()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return cookieString
|
||||
.split(";")
|
||||
.map((pair) => pair.trim())
|
||||
.filter((pair) => pair.includes("="))
|
||||
.map((pair) => {
|
||||
const [name, value] = pair.split("=", 2);
|
||||
const trimmedName = name.trim();
|
||||
const trimmedValue = value.trim();
|
||||
|
||||
// Skip empty names or values
|
||||
if (!trimmedName || !trimmedValue) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
name: trimmedName,
|
||||
value: decodeURIComponent(trimmedValue),
|
||||
domain: ".facebook.com",
|
||||
path: "/",
|
||||
secure: true,
|
||||
httpOnly: false,
|
||||
sameSite: "lax" as const,
|
||||
expirationDate: undefined, // Session cookies
|
||||
};
|
||||
})
|
||||
.filter((cookie): cookie is Cookie => cookie !== null);
|
||||
return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure Facebook cookies are available, parsing from env var if needed
|
||||
* Load Facebook cookies with priority: URL param > ENV var > file
|
||||
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
|
||||
* @param _cookiePath - Deprecated, uses default path from config
|
||||
*/
|
||||
export async function ensureFacebookCookies(
|
||||
cookiePath = "./cookies/facebook.json"
|
||||
cookiesSource?: string,
|
||||
_cookiePath?: string,
|
||||
): Promise<Cookie[]> {
|
||||
// First try to load existing cookies
|
||||
try {
|
||||
const existing = await loadFacebookCookies(undefined, cookiePath);
|
||||
if (existing.length > 0) {
|
||||
return existing;
|
||||
}
|
||||
} catch {
|
||||
// File doesn't exist or is invalid, continue to check env var
|
||||
}
|
||||
|
||||
// Try to parse from environment variable
|
||||
const cookieString = process.env.FACEBOOK_COOKIE;
|
||||
if (!cookieString || !cookieString.trim()) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Either:\n" +
|
||||
" 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
|
||||
" 2. Create ./cookies/facebook.json manually with cookie array"
|
||||
);
|
||||
}
|
||||
|
||||
// Parse the cookie string
|
||||
const cookies = parseFacebookCookieString(cookieString);
|
||||
if (cookies.length === 0) {
|
||||
throw new Error(
|
||||
"FACEBOOK_COOKIE environment variable contains no valid cookies. " +
|
||||
'Expected format: "name1=value1; name2=value2;"'
|
||||
);
|
||||
}
|
||||
|
||||
// Save to file for future use
|
||||
try {
|
||||
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
|
||||
console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
|
||||
} catch (error) {
|
||||
console.warn(`Could not save cookies to ${cookiePath}: ${error}`);
|
||||
// Continue anyway, we have the cookies in memory
|
||||
}
|
||||
|
||||
return cookies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format cookies array into Cookie header string
|
||||
*/
|
||||
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
|
||||
const validCookies = cookies
|
||||
.filter((cookie) => {
|
||||
// Check if cookie applies to this domain
|
||||
if (cookie.domain.startsWith(".")) {
|
||||
// Domain cookie (applies to subdomains)
|
||||
return (
|
||||
domain.endsWith(cookie.domain.slice(1)) ||
|
||||
domain === cookie.domain.slice(1)
|
||||
);
|
||||
}
|
||||
// Host-only cookie
|
||||
return cookie.domain === domain;
|
||||
})
|
||||
.filter((cookie) => {
|
||||
// Check expiration
|
||||
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
||||
return false; // Expired
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
return validCookies
|
||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||
.join("; ");
|
||||
return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
|
||||
}
|
||||
|
||||
class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status: number,
|
||||
public readonly url: string
|
||||
public readonly url: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
@@ -407,7 +265,7 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
|
||||
!extractionStats.lastApiChangeDetected
|
||||
) {
|
||||
console.warn(
|
||||
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes."
|
||||
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.",
|
||||
);
|
||||
extractionStats.lastApiChangeDetected = new Date();
|
||||
}
|
||||
@@ -433,7 +291,7 @@ async function fetchHtml(
|
||||
retryBaseMs?: number;
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
cookies?: string;
|
||||
}
|
||||
},
|
||||
): Promise<HTMLString> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
||||
@@ -487,7 +345,7 @@ async function fetchHtml(
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
|
||||
res.status,
|
||||
url
|
||||
url,
|
||||
);
|
||||
}
|
||||
// Retry on 5xx
|
||||
@@ -498,7 +356,7 @@ async function fetchHtml(
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url
|
||||
url,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -521,7 +379,7 @@ async function fetchHtml(
|
||||
Extract marketplace search data from Facebook page script tags
|
||||
*/
|
||||
export function extractFacebookMarketplaceData(
|
||||
htmlString: HTMLString
|
||||
htmlString: HTMLString,
|
||||
): FacebookAdNode[] | null {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const scripts = document.querySelectorAll("script");
|
||||
@@ -567,13 +425,12 @@ export function extractFacebookMarketplaceData(
|
||||
if (
|
||||
result &&
|
||||
isRecord(result) &&
|
||||
(result as any).feed_units?.edges?.length > 0
|
||||
(result as Record<string, unknown>).feed_units?.edges?.length > 0
|
||||
) {
|
||||
marketplaceData = result as FacebookMarketplaceSearch;
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
if (marketplaceData) break;
|
||||
@@ -583,13 +440,13 @@ export function extractFacebookMarketplaceData(
|
||||
if (parsed.marketplace_search && isRecord(parsed.marketplace_search)) {
|
||||
const searchData =
|
||||
parsed.marketplace_search as FacebookMarketplaceSearch;
|
||||
if (searchData.feed_units?.edges?.length ?? 0 > 0) {
|
||||
const feedLength = searchData.feed_units?.edges?.length ?? 0;
|
||||
if (feedLength > 0) {
|
||||
marketplaceData = searchData;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
if (!marketplaceData?.feed_units?.edges?.length) {
|
||||
@@ -598,7 +455,7 @@ export function extractFacebookMarketplaceData(
|
||||
}
|
||||
|
||||
console.log(
|
||||
`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`
|
||||
`Successfully parsed ${marketplaceData.feed_units.edges.length} Facebook marketplace listings`,
|
||||
);
|
||||
return marketplaceData.feed_units.edges.map((edge) => ({ node: edge.node }));
|
||||
}
|
||||
@@ -608,7 +465,7 @@ export function extractFacebookMarketplaceData(
|
||||
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
|
||||
*/
|
||||
export function extractFacebookItemData(
|
||||
htmlString: HTMLString
|
||||
htmlString: HTMLString,
|
||||
): FacebookMarketplaceItem | null {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const scripts = document.querySelectorAll("script");
|
||||
@@ -657,7 +514,7 @@ export function extractFacebookItemData(
|
||||
targetData.__typename === "GroupCommerceProductItem"
|
||||
) {
|
||||
console.log(
|
||||
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`
|
||||
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`,
|
||||
);
|
||||
return targetData as FacebookMarketplaceItem;
|
||||
}
|
||||
@@ -671,18 +528,19 @@ export function extractFacebookItemData(
|
||||
const findMarketplaceData = (
|
||||
obj: unknown,
|
||||
depth = 0,
|
||||
maxDepth = 10
|
||||
maxDepth = 10,
|
||||
): FacebookMarketplaceItem | null => {
|
||||
if (depth > maxDepth) return null; // Prevent infinite recursion
|
||||
if (isRecord(obj)) {
|
||||
// Check if this object matches the expected marketplace item structure
|
||||
const candidate = obj as Record<string, unknown>;
|
||||
if (
|
||||
(obj as any).marketplace_listing_title &&
|
||||
(obj as any).id &&
|
||||
(obj as any).__typename === "GroupCommerceProductItem" &&
|
||||
(obj as any).redacted_description
|
||||
candidate.marketplace_listing_title &&
|
||||
candidate.id &&
|
||||
candidate.__typename === "GroupCommerceProductItem" &&
|
||||
candidate.redacted_description
|
||||
) {
|
||||
return obj as unknown as FacebookMarketplaceItem;
|
||||
return candidate as unknown as FacebookMarketplaceItem;
|
||||
}
|
||||
// Recursively search nested objects and arrays
|
||||
for (const key in obj) {
|
||||
@@ -706,7 +564,7 @@ export function extractFacebookItemData(
|
||||
const recursiveResult = findMarketplaceData(parsed.require);
|
||||
if (recursiveResult) {
|
||||
console.log(
|
||||
"Successfully extracted Facebook item data using recursive search"
|
||||
"Successfully extracted Facebook item data using recursive search",
|
||||
);
|
||||
return recursiveResult;
|
||||
}
|
||||
@@ -727,14 +585,13 @@ export function extractFacebookItemData(
|
||||
bboxData.__typename === "GroupCommerceProductItem"
|
||||
) {
|
||||
console.log(
|
||||
"Successfully extracted Facebook item data from __bbox structure"
|
||||
"Successfully extracted Facebook item data from __bbox structure",
|
||||
);
|
||||
return bboxData as FacebookMarketplaceItem;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
return null;
|
||||
@@ -743,7 +600,9 @@ export function extractFacebookItemData(
|
||||
/**
|
||||
Parse Facebook marketplace search results into ListingDetails[]
|
||||
*/
|
||||
export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[] {
|
||||
export function parseFacebookAds(
|
||||
ads: FacebookAdNode[],
|
||||
): FacebookListingDetails[] {
|
||||
const results: FacebookListingDetails[] = [];
|
||||
|
||||
for (const adJson of ads) {
|
||||
@@ -840,7 +699,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
|
||||
title,
|
||||
listingPrice: {
|
||||
amountFormatted:
|
||||
priceObj.formatted_amount || formatCentsToCurrency(cents / 100, "en-CA"),
|
||||
priceObj.formatted_amount || formatCentsToCurrency(cents, "en-CA"),
|
||||
cents,
|
||||
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
|
||||
},
|
||||
@@ -856,8 +715,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
|
||||
};
|
||||
|
||||
results.push(listingDetails);
|
||||
} catch {
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
return results;
|
||||
@@ -868,7 +726,7 @@ export function parseFacebookAds(ads: FacebookAdNode[]): FacebookListingDetails[
|
||||
Updated for 2026 GroupCommerceProductItem structure
|
||||
*/
|
||||
export function parseFacebookItem(
|
||||
item: FacebookMarketplaceItem
|
||||
item: FacebookMarketplaceItem,
|
||||
): FacebookListingDetails | null {
|
||||
try {
|
||||
const title = item.marketplace_listing_title || item.custom_title;
|
||||
@@ -888,7 +746,7 @@ export function parseFacebookItem(
|
||||
if (!Number.isNaN(amount)) {
|
||||
cents = Math.round(amount * 100);
|
||||
amountFormatted =
|
||||
item.formatted_price?.text || formatCentsToCurrency(cents / 100, "en-CA");
|
||||
item.formatted_price?.text || formatCentsToCurrency(cents, "en-CA");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -963,31 +821,17 @@ export default async function fetchFacebookItems(
|
||||
LOCATION = "toronto",
|
||||
MAX_ITEMS = 25,
|
||||
cookiesSource?: string,
|
||||
cookiePath?: string
|
||||
cookiePath?: string,
|
||||
) {
|
||||
// Load Facebook cookies - required for Facebook Marketplace access
|
||||
let cookies: Cookie[];
|
||||
if (cookiesSource) {
|
||||
// Use provided cookie source (backward compatibility)
|
||||
cookies = await loadFacebookCookies(cookiesSource);
|
||||
} else {
|
||||
// Auto-load from file or parse from env var
|
||||
cookies = await ensureFacebookCookies(cookiePath);
|
||||
}
|
||||
|
||||
if (cookies.length === 0) {
|
||||
throw new Error(
|
||||
"Facebook cookies are required for marketplace access. " +
|
||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies."
|
||||
);
|
||||
}
|
||||
// Load Facebook cookies with priority: URL param > ENV var > file
|
||||
const cookies = await ensureFacebookCookies(cookiesSource, cookiePath);
|
||||
|
||||
// Format cookies for HTTP header
|
||||
const domain = "www.facebook.com";
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
||||
if (!cookiesHeader) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain."
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1009,7 +853,7 @@ export default async function fetchFacebookItems(
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
||||
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
@@ -1018,11 +862,11 @@ export default async function fetchFacebookItems(
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.warn(
|
||||
`\nFacebook marketplace access failed (${err.status}): ${err.message}`
|
||||
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
|
||||
);
|
||||
if (err.status === 400 || err.status === 401 || err.status === 403) {
|
||||
console.warn(
|
||||
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies."
|
||||
"This might indicate invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
|
||||
);
|
||||
}
|
||||
return [];
|
||||
@@ -1040,7 +884,7 @@ export default async function fetchFacebookItems(
|
||||
|
||||
const progressBar = new cliProgress.SingleBar(
|
||||
{},
|
||||
cliProgress.Presets.shades_classic
|
||||
cliProgress.Presets.shades_classic,
|
||||
);
|
||||
const totalProgress = ads.length;
|
||||
const currentProgress = 0;
|
||||
@@ -1050,7 +894,7 @@ export default async function fetchFacebookItems(
|
||||
|
||||
// Filter to only priced items (already done in parseFacebookAds)
|
||||
const pricedItems = items.filter(
|
||||
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0
|
||||
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
|
||||
);
|
||||
|
||||
progressBar.update(totalProgress);
|
||||
@@ -1066,31 +910,16 @@ export default async function fetchFacebookItems(
|
||||
export async function fetchFacebookItem(
|
||||
itemId: string,
|
||||
cookiesSource?: string,
|
||||
cookiePath?: string
|
||||
_cookiePath?: string,
|
||||
): Promise<FacebookListingDetails | null> {
|
||||
// Load Facebook cookies - required for Facebook Marketplace access
|
||||
let cookies: Cookie[];
|
||||
if (cookiesSource) {
|
||||
// Use provided cookie source (backward compatibility)
|
||||
cookies = await loadFacebookCookies(cookiesSource);
|
||||
} else {
|
||||
// Auto-load from file or parse from env var
|
||||
cookies = await ensureFacebookCookies(cookiePath);
|
||||
}
|
||||
|
||||
if (cookies.length === 0) {
|
||||
throw new Error(
|
||||
"Facebook cookies are required for marketplace access. " +
|
||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies."
|
||||
);
|
||||
}
|
||||
const cookies = await ensureFacebookCookies(cookiesSource);
|
||||
|
||||
// Format cookies for HTTP header
|
||||
const domain = "www.facebook.com";
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
|
||||
if (!cookiesHeader) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain."
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1104,7 +933,7 @@ export async function fetchFacebookItem(
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
||||
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
@@ -1113,7 +942,7 @@ export async function fetchFacebookItem(
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.warn(
|
||||
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`
|
||||
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`,
|
||||
);
|
||||
|
||||
// Enhanced error handling based on status codes
|
||||
@@ -1122,27 +951,27 @@ export async function fetchFacebookItem(
|
||||
case 401:
|
||||
case 403:
|
||||
console.warn(
|
||||
"Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies."
|
||||
"Authentication error: Invalid or expired cookies. Please update ./cookies/facebook.json with fresh session cookies.",
|
||||
);
|
||||
console.warn(
|
||||
"Try logging out and back into Facebook, then export fresh cookies."
|
||||
"Try logging out and back into Facebook, then export fresh cookies.",
|
||||
);
|
||||
break;
|
||||
case 404:
|
||||
console.warn(
|
||||
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid."
|
||||
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.",
|
||||
);
|
||||
break;
|
||||
case 429:
|
||||
console.warn(
|
||||
"Rate limited: Too many requests. Facebook is blocking access temporarily."
|
||||
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
|
||||
);
|
||||
break;
|
||||
case 500:
|
||||
case 502:
|
||||
case 503:
|
||||
console.warn(
|
||||
"Facebook server error: Marketplace may be temporarily unavailable."
|
||||
"Facebook server error: Marketplace may be temporarily unavailable.",
|
||||
);
|
||||
break;
|
||||
default:
|
||||
@@ -1163,7 +992,7 @@ export async function fetchFacebookItem(
|
||||
itemHtml.includes("This item has been sold")
|
||||
) {
|
||||
console.warn(
|
||||
`Item ${itemId} appears to be sold or removed from marketplace.`
|
||||
`Item ${itemId} appears to be sold or removed from marketplace.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
@@ -1174,13 +1003,13 @@ export async function fetchFacebookItem(
|
||||
itemHtml.includes("authentication required")
|
||||
) {
|
||||
console.warn(
|
||||
`Authentication failed for item ${itemId}. Cookies may be expired.`
|
||||
`Authentication failed for item ${itemId}. Cookies may be expired.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`
|
||||
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`,
|
||||
);
|
||||
console.warn(" - The listing was removed or sold");
|
||||
console.warn(" - Authentication issues");
|
||||
|
||||
@@ -1,19 +1,30 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import unidecode from "unidecode";
|
||||
import cliProgress from "cli-progress";
|
||||
import type { HTMLString } from "../types/common";
|
||||
import {
|
||||
type CookieConfig,
|
||||
formatCookiesForHeader,
|
||||
loadCookiesOptional,
|
||||
} from "../utils/cookies";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import {
|
||||
fetchHtml,
|
||||
isRecord,
|
||||
HttpError,
|
||||
isRecord,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
ValidationError,
|
||||
} from "../utils/http";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import type { HTMLString } from "../types/common";
|
||||
|
||||
// Kijiji cookie configuration
|
||||
const KIJIJI_COOKIE_CONFIG: CookieConfig = {
|
||||
name: "Kijiji",
|
||||
domain: ".kijiji.ca",
|
||||
envVar: "KIJIJI_COOKIE",
|
||||
filePath: "./cookies/kijiji.json",
|
||||
};
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
|
||||
@@ -112,6 +123,7 @@ export interface SearchOptions {
|
||||
maxPages?: number; // Default: 5
|
||||
priceMin?: number;
|
||||
priceMax?: number;
|
||||
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||
}
|
||||
|
||||
export interface ListingFetchOptions {
|
||||
@@ -219,7 +231,7 @@ export function resolveCategoryId(category?: number | string): number {
|
||||
export function buildSearchUrl(
|
||||
keywords: string,
|
||||
options: SearchOptions & { page?: number },
|
||||
BASE_URL = "https://www.kijiji.ca"
|
||||
BASE_URL = "https://www.kijiji.ca",
|
||||
): string {
|
||||
const locationId = resolveLocationId(options.location);
|
||||
const categoryId = resolveCategoryId(options.category);
|
||||
@@ -319,7 +331,7 @@ const GRAPHQL_QUERIES = {
|
||||
async function fetchGraphQLData(
|
||||
query: string,
|
||||
variables: Record<string, unknown>,
|
||||
BASE_URL = "https://www.kijiji.ca"
|
||||
BASE_URL = "https://www.kijiji.ca",
|
||||
): Promise<unknown> {
|
||||
const endpoint = `${BASE_URL}/anvil/api`;
|
||||
|
||||
@@ -340,7 +352,7 @@ async function fetchGraphQLData(
|
||||
throw new HttpError(
|
||||
`GraphQL request failed with status ${response.status}`,
|
||||
response.status,
|
||||
endpoint
|
||||
endpoint,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -349,7 +361,7 @@ async function fetchGraphQLData(
|
||||
if (result.errors) {
|
||||
throw new ParseError(
|
||||
`GraphQL errors: ${JSON.stringify(result.errors)}`,
|
||||
result.errors
|
||||
result.errors,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -361,7 +373,7 @@ async function fetchGraphQLData(
|
||||
throw new NetworkError(
|
||||
`Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`,
|
||||
endpoint,
|
||||
err instanceof Error ? err : undefined
|
||||
err instanceof Error ? err : undefined,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -371,7 +383,7 @@ async function fetchGraphQLData(
|
||||
*/
|
||||
async function fetchSellerDetails(
|
||||
posterId: string,
|
||||
BASE_URL = "https://www.kijiji.ca"
|
||||
BASE_URL = "https://www.kijiji.ca",
|
||||
): Promise<{
|
||||
reviewCount?: number;
|
||||
reviewScore?: number;
|
||||
@@ -383,12 +395,12 @@ async function fetchSellerDetails(
|
||||
fetchGraphQLData(
|
||||
GRAPHQL_QUERIES.getReviewSummary,
|
||||
{ userId: posterId },
|
||||
BASE_URL
|
||||
BASE_URL,
|
||||
),
|
||||
fetchGraphQLData(
|
||||
GRAPHQL_QUERIES.getProfileMetrics,
|
||||
{ profileId: posterId },
|
||||
BASE_URL
|
||||
BASE_URL,
|
||||
),
|
||||
]);
|
||||
|
||||
@@ -405,7 +417,7 @@ async function fetchSellerDetails(
|
||||
// Silently fail for GraphQL errors - not critical for basic functionality
|
||||
console.warn(
|
||||
`Failed to fetch seller details for ${posterId}:`,
|
||||
err instanceof Error ? err.message : String(err)
|
||||
err instanceof Error ? err.message : String(err),
|
||||
);
|
||||
return {};
|
||||
}
|
||||
@@ -416,7 +428,9 @@ async function fetchSellerDetails(
|
||||
/**
|
||||
Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML.
|
||||
*/
|
||||
export function extractApolloState(htmlString: HTMLString): ApolloRecord | null {
|
||||
export function extractApolloState(
|
||||
htmlString: HTMLString,
|
||||
): ApolloRecord | null {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const nextData = document.getElementById("__NEXT_DATA__");
|
||||
if (!nextData || !nextData.textContent) return null;
|
||||
@@ -436,7 +450,7 @@ export function extractApolloState(htmlString: HTMLString): ApolloRecord | null
|
||||
*/
|
||||
export function parseSearch(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string
|
||||
BASE_URL: string,
|
||||
): SearchListing[] {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return [];
|
||||
@@ -463,16 +477,16 @@ export function parseSearch(
|
||||
/**
|
||||
Parse a listing page into a typed object (backward compatible).
|
||||
*/
|
||||
function parseListing(
|
||||
function _parseListing(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string
|
||||
BASE_URL: string,
|
||||
): KijijiListingDetails | null {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return null;
|
||||
|
||||
// Find the listing root key
|
||||
const listingKey = Object.keys(apolloState).find((k) =>
|
||||
k.includes("Listing")
|
||||
k.includes("Listing"),
|
||||
);
|
||||
if (!listingKey) return null;
|
||||
|
||||
@@ -494,7 +508,7 @@ function parseListing(
|
||||
|
||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||
const amountFormatted =
|
||||
cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined;
|
||||
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
|
||||
|
||||
const numberOfViews =
|
||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||
@@ -515,7 +529,8 @@ function parseListing(
|
||||
listingPrice: amountFormatted
|
||||
? {
|
||||
amountFormatted,
|
||||
cents: Number.isFinite(cents!) ? cents : undefined,
|
||||
cents:
|
||||
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
|
||||
currency: price?.currency,
|
||||
}
|
||||
: undefined,
|
||||
@@ -523,7 +538,10 @@ function parseListing(
|
||||
listingStatus: status,
|
||||
creationDate: activationDate,
|
||||
endDate,
|
||||
numberOfViews: Number.isFinite(numberOfViews!) ? numberOfViews : undefined,
|
||||
numberOfViews:
|
||||
numberOfViews !== undefined && Number.isFinite(numberOfViews)
|
||||
? numberOfViews
|
||||
: undefined,
|
||||
address: location?.address ?? null,
|
||||
};
|
||||
}
|
||||
@@ -534,14 +552,14 @@ function parseListing(
|
||||
export async function parseDetailedListing(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string,
|
||||
options: ListingFetchOptions = {}
|
||||
options: ListingFetchOptions = {},
|
||||
): Promise<DetailedListing | null> {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return null;
|
||||
|
||||
// Find the listing root key
|
||||
const listingKey = Object.keys(apolloState).find((k) =>
|
||||
k.includes("Listing")
|
||||
k.includes("Listing"),
|
||||
);
|
||||
if (!listingKey) return null;
|
||||
|
||||
@@ -569,7 +587,7 @@ export async function parseDetailedListing(
|
||||
|
||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||
const amountFormatted =
|
||||
cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined;
|
||||
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
|
||||
|
||||
const numberOfViews =
|
||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||
@@ -621,7 +639,7 @@ export async function parseDetailedListing(
|
||||
try {
|
||||
const additionalData = await fetchSellerDetails(
|
||||
posterInfo.posterId,
|
||||
BASE_URL
|
||||
BASE_URL,
|
||||
);
|
||||
sellerInfo = {
|
||||
...sellerInfo,
|
||||
@@ -630,7 +648,7 @@ export async function parseDetailedListing(
|
||||
} catch {
|
||||
// Silently fail - GraphQL data is optional
|
||||
console.warn(
|
||||
`Failed to fetch additional seller data for ${posterInfo.posterId}`
|
||||
`Failed to fetch additional seller data for ${posterInfo.posterId}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -683,10 +701,20 @@ export default async function fetchKijijiItems(
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
BASE_URL = "https://www.kijiji.ca",
|
||||
searchOptions: SearchOptions = {},
|
||||
listingOptions: ListingFetchOptions = {}
|
||||
listingOptions: ListingFetchOptions = {},
|
||||
) {
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
|
||||
// Load Kijiji cookies (optional - helps bypass bot detection)
|
||||
const cookies = await loadCookiesOptional(
|
||||
KIJIJI_COOKIE_CONFIG,
|
||||
searchOptions.cookies,
|
||||
);
|
||||
const cookieHeader =
|
||||
cookies.length > 0
|
||||
? formatCookiesForHeader(cookies, "www.kijiji.ca")
|
||||
: undefined;
|
||||
|
||||
// Set defaults for configuration
|
||||
const finalSearchOptions: Required<SearchOptions> = {
|
||||
location: searchOptions.location ?? 1700272, // Default to GTA
|
||||
@@ -697,6 +725,7 @@ export default async function fetchKijijiItems(
|
||||
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
||||
priceMin: searchOptions.priceMin as number,
|
||||
priceMax: searchOptions.priceMax as number,
|
||||
cookies: searchOptions.cookies ?? "",
|
||||
};
|
||||
|
||||
const finalListingOptions: Required<ListingFetchOptions> = {
|
||||
@@ -717,7 +746,7 @@ export default async function fetchKijijiItems(
|
||||
// Add page parameter for pagination
|
||||
...(page > 1 && { page }),
|
||||
},
|
||||
BASE_URL
|
||||
BASE_URL,
|
||||
);
|
||||
|
||||
console.log(`Fetching search page ${page}: ${searchUrl}`);
|
||||
@@ -725,16 +754,17 @@ export default async function fetchKijijiItems(
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
||||
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
|
||||
});
|
||||
|
||||
const searchResults = parseSearch(searchHtml, BASE_URL);
|
||||
if (searchResults.length === 0) {
|
||||
console.log(
|
||||
`No more results found on page ${page}. Stopping pagination.`
|
||||
`No more results found on page ${page}. Stopping pagination.`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
@@ -749,54 +779,79 @@ export default async function fetchKijijiItems(
|
||||
}
|
||||
|
||||
console.log(
|
||||
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`
|
||||
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`,
|
||||
);
|
||||
|
||||
// Fetch details for this page's listings
|
||||
const progressBar = new cliProgress.SingleBar(
|
||||
{},
|
||||
cliProgress.Presets.shades_classic
|
||||
);
|
||||
// Fetch details for this page's listings with controlled concurrency
|
||||
const isTTY = process.stdout?.isTTY ?? false;
|
||||
const progressBar = isTTY
|
||||
? new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
|
||||
: null;
|
||||
const totalProgress = newListingLinks.length;
|
||||
let currentProgress = 0;
|
||||
progressBar.start(totalProgress, currentProgress);
|
||||
progressBar?.start(totalProgress, currentProgress);
|
||||
|
||||
for (const link of newListingLinks) {
|
||||
// Process in batches for controlled concurrency
|
||||
const CONCURRENT_REQUESTS = REQUESTS_PER_SECOND * 2; // 2x rate for faster processing
|
||||
const results: (DetailedListing | null)[] = [];
|
||||
|
||||
for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) {
|
||||
const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS);
|
||||
const batchPromises = batch.map(async (link) => {
|
||||
try {
|
||||
const html = await fetchHtml(link, DELAY_MS, {
|
||||
const html = await fetchHtml(link, 0, {
|
||||
// No per-request delay, batch handles rate limit
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`
|
||||
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
|
||||
});
|
||||
const parsed = await parseDetailedListing(
|
||||
html,
|
||||
BASE_URL,
|
||||
finalListingOptions
|
||||
finalListingOptions,
|
||||
);
|
||||
if (parsed) {
|
||||
allListings.push(parsed);
|
||||
}
|
||||
return parsed;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`
|
||||
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
|
||||
);
|
||||
} else {
|
||||
console.error(
|
||||
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`
|
||||
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
|
||||
);
|
||||
}
|
||||
return null;
|
||||
} finally {
|
||||
currentProgress++;
|
||||
progressBar.update(currentProgress);
|
||||
progressBar?.update(currentProgress);
|
||||
if (!progressBar) {
|
||||
console.log(`Progress: ${currentProgress}/${totalProgress}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const batchResults = await Promise.all(batchPromises);
|
||||
results.push(...batchResults);
|
||||
|
||||
// Wait between batches to respect rate limit
|
||||
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, DELAY_MS * batch.length),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
progressBar.stop();
|
||||
allListings.push(
|
||||
...results.filter((r): r is DetailedListing => r !== null),
|
||||
);
|
||||
|
||||
progressBar?.stop();
|
||||
|
||||
// If we got fewer results than expected (40 per page), we've reached the end
|
||||
if (searchResults.length < 40) {
|
||||
@@ -809,10 +864,4 @@ export default async function fetchKijijiItems(
|
||||
}
|
||||
|
||||
// Re-export error classes for convenience
|
||||
export {
|
||||
HttpError,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
ValidationError,
|
||||
};
|
||||
export { HttpError, NetworkError, ParseError, RateLimitError, ValidationError };
|
||||
|
||||
227
packages/core/src/utils/cookies.ts
Normal file
227
packages/core/src/utils/cookies.ts
Normal file
@@ -0,0 +1,227 @@
|
||||
/**
|
||||
* Shared cookie handling utilities for marketplace scrapers
|
||||
*/
|
||||
|
||||
export interface Cookie {
|
||||
name: string;
|
||||
value: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
secure?: boolean;
|
||||
httpOnly?: boolean;
|
||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||
session?: boolean;
|
||||
expirationDate?: number;
|
||||
partitionKey?: Record<string, unknown>;
|
||||
storeId?: string;
|
||||
}
|
||||
|
||||
export interface CookieConfig {
|
||||
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
|
||||
name: string;
|
||||
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
|
||||
domain: string;
|
||||
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
|
||||
envVar: string;
|
||||
/** Path to cookie file (e.g., "./cookies/facebook.json") */
|
||||
filePath: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse cookie string format into Cookie array
|
||||
* Supports format: "name1=value1; name2=value2"
|
||||
*/
|
||||
export function parseCookieString(
|
||||
cookieString: string,
|
||||
domain: string,
|
||||
): Cookie[] {
|
||||
if (!cookieString?.trim()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return cookieString
|
||||
.split(";")
|
||||
.map((pair) => pair.trim())
|
||||
.filter((pair) => pair.includes("="))
|
||||
.map((pair) => {
|
||||
const [name, ...valueParts] = pair.split("=");
|
||||
const trimmedName = name.trim();
|
||||
const trimmedValue = valueParts.join("=").trim();
|
||||
|
||||
if (!trimmedName || !trimmedValue) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
name: trimmedName,
|
||||
value: decodeURIComponent(trimmedValue),
|
||||
domain,
|
||||
path: "/",
|
||||
secure: true,
|
||||
httpOnly: false,
|
||||
sameSite: "lax" as const,
|
||||
expirationDate: undefined,
|
||||
};
|
||||
})
|
||||
.filter((cookie): cookie is Cookie => cookie !== null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse JSON array format into Cookie array
|
||||
* Supports format: [{"name": "foo", "value": "bar", ...}]
|
||||
*/
|
||||
export function parseJsonCookies(jsonString: string): Cookie[] {
|
||||
const parsed = JSON.parse(jsonString);
|
||||
if (!Array.isArray(parsed)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return parsed.filter(
|
||||
(cookie): cookie is Cookie =>
|
||||
cookie &&
|
||||
typeof cookie.name === "string" &&
|
||||
typeof cookie.value === "string",
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to parse cookies from a string (tries JSON first, then cookie string format)
|
||||
*/
|
||||
export function parseCookiesAuto(
|
||||
input: string,
|
||||
defaultDomain: string,
|
||||
): Cookie[] {
|
||||
// Try JSON array format first
|
||||
try {
|
||||
const cookies = parseJsonCookies(input);
|
||||
if (cookies.length > 0) {
|
||||
return cookies;
|
||||
}
|
||||
} catch {
|
||||
// JSON parse failed, try cookie string format
|
||||
}
|
||||
|
||||
// Try cookie string format
|
||||
return parseCookieString(input, defaultDomain);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load cookies from file (supports both JSON array and cookie string formats)
|
||||
*/
|
||||
export async function loadCookiesFromFile(
|
||||
filePath: string,
|
||||
defaultDomain: string,
|
||||
): Promise<Cookie[]> {
|
||||
const file = Bun.file(filePath);
|
||||
if (!(await file.exists())) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const content = await file.text();
|
||||
return parseCookiesAuto(content.trim(), defaultDomain);
|
||||
}
|
||||
|
||||
/**
|
||||
* Format cookies array into Cookie header string for HTTP requests
|
||||
*/
|
||||
export function formatCookiesForHeader(
|
||||
cookies: Cookie[],
|
||||
targetDomain: string,
|
||||
): string {
|
||||
const validCookies = cookies
|
||||
.filter((cookie) => {
|
||||
// Check if cookie applies to this domain
|
||||
if (cookie.domain.startsWith(".")) {
|
||||
// Domain cookie (applies to subdomains)
|
||||
return (
|
||||
targetDomain.endsWith(cookie.domain.slice(1)) ||
|
||||
targetDomain === cookie.domain.slice(1)
|
||||
);
|
||||
}
|
||||
// Host-only cookie
|
||||
return cookie.domain === targetDomain;
|
||||
})
|
||||
.filter((cookie) => {
|
||||
// Check expiration
|
||||
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
return validCookies
|
||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||
.join("; ");
|
||||
}
|
||||
|
||||
/**
|
||||
* Load cookies with priority: URL param > ENV var > file
|
||||
* Supports both JSON array and cookie string formats for all sources
|
||||
*/
|
||||
export async function ensureCookies(
|
||||
config: CookieConfig,
|
||||
cookiesSource?: string,
|
||||
): Promise<Cookie[]> {
|
||||
// Priority 1: URL/API parameter (if provided)
|
||||
if (cookiesSource) {
|
||||
const cookies = parseCookiesAuto(cookiesSource, config.domain);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
console.warn(
|
||||
`${config.name} cookies parameter provided but no valid cookies extracted`,
|
||||
);
|
||||
}
|
||||
|
||||
// Priority 2: Environment variable
|
||||
const envValue = process.env[config.envVar];
|
||||
if (envValue?.trim()) {
|
||||
const cookies = parseCookiesAuto(envValue, config.domain);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
console.warn(`${config.envVar} env var contains no valid cookies`);
|
||||
}
|
||||
|
||||
// Priority 3: Cookie file (fallback)
|
||||
try {
|
||||
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
|
||||
);
|
||||
return cookies;
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
|
||||
}
|
||||
|
||||
// No cookies found from any source
|
||||
throw new Error(
|
||||
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
|
||||
` 1. 'cookies' parameter (highest priority), or\n` +
|
||||
` 2. ${config.envVar} environment variable, or\n` +
|
||||
` 3. ${config.filePath} file (lowest priority)\n` +
|
||||
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to load cookies, return empty array if none found (non-throwing version)
|
||||
*/
|
||||
export async function loadCookiesOptional(
|
||||
config: CookieConfig,
|
||||
cookiesSource?: string,
|
||||
): Promise<Cookie[]> {
|
||||
try {
|
||||
return await ensureCookies(config, cookiesSource);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,10 @@
|
||||
* @param locale - Locale string for formatting (e.g., 'en-CA', 'en-US')
|
||||
* @returns Formatted currency string
|
||||
*/
|
||||
export function formatCentsToCurrency(cents: number, locale: string = "en-CA"): string {
|
||||
export function formatCentsToCurrency(
|
||||
cents: number,
|
||||
locale: string = "en-CA",
|
||||
): string {
|
||||
try {
|
||||
const formatter = new Intl.NumberFormat(locale, {
|
||||
style: "currency",
|
||||
@@ -13,7 +16,7 @@ export function formatCentsToCurrency(cents: number, locale: string = "en-CA"):
|
||||
maximumFractionDigits: 2,
|
||||
});
|
||||
return formatter.format(cents / 100);
|
||||
} catch (error) {
|
||||
} catch {
|
||||
// Fallback if locale is not supported
|
||||
const dollars = (cents / 100).toFixed(2);
|
||||
return `$${dollars}`;
|
||||
|
||||
@@ -3,7 +3,7 @@ export class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly statusCode: number,
|
||||
public readonly url?: string
|
||||
public readonly url?: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
@@ -15,7 +15,7 @@ export class NetworkError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly url: string,
|
||||
public readonly cause?: Error
|
||||
public readonly cause?: Error,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "NetworkError";
|
||||
@@ -26,7 +26,7 @@ export class NetworkError extends Error {
|
||||
export class ParseError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly data?: unknown
|
||||
public readonly data?: unknown,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "ParseError";
|
||||
@@ -38,7 +38,7 @@ export class RateLimitError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly url: string,
|
||||
public readonly resetTime?: number
|
||||
public readonly resetTime?: number,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "RateLimitError";
|
||||
@@ -87,7 +87,7 @@ export interface FetchHtmlOptions {
|
||||
export async function fetchHtml(
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts?: FetchHtmlOptions
|
||||
opts?: FetchHtmlOptions,
|
||||
): Promise<string> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 1000;
|
||||
@@ -137,14 +137,14 @@ export async function fetchHtml(
|
||||
throw new RateLimitError(
|
||||
`Rate limit exceeded for ${url}`,
|
||||
url,
|
||||
resetSeconds
|
||||
resetSeconds,
|
||||
);
|
||||
}
|
||||
|
||||
// Retry on server errors
|
||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@@ -152,7 +152,7 @@ export async function fetchHtml(
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url
|
||||
url,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -174,7 +174,7 @@ export async function fetchHtml(
|
||||
if (err instanceof Error && err.name === "AbortError") {
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@@ -184,14 +184,14 @@ export async function fetchHtml(
|
||||
// Network or other errors
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs))
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new NetworkError(
|
||||
`Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
||||
url,
|
||||
err instanceof Error ? err : undefined
|
||||
err instanceof Error ? err : undefined,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ import {
|
||||
fetchFacebookItem,
|
||||
formatCentsToCurrency,
|
||||
formatCookiesForHeader,
|
||||
loadFacebookCookies,
|
||||
parseFacebookAds,
|
||||
parseFacebookCookieString,
|
||||
parseFacebookItem,
|
||||
@@ -183,7 +182,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
});
|
||||
});
|
||||
|
||||
const result = await fetchFacebookItem("123", mockCookies);
|
||||
const _result = await fetchFacebookItem("123", mockCookies);
|
||||
expect(attempts).toBe(2);
|
||||
// Should eventually succeed after retry
|
||||
});
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook";
|
||||
import { fetchFacebookItems } from "../src/scrapers/facebook";
|
||||
|
||||
// Mock fetch globally
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import {
|
||||
HttpError,
|
||||
buildSearchUrl,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
ValidationError,
|
||||
buildSearchUrl,
|
||||
resolveCategoryId,
|
||||
resolveLocationId,
|
||||
ValidationError,
|
||||
} from "../src/scrapers/kijiji";
|
||||
|
||||
describe("Location and Category Resolution", () => {
|
||||
@@ -121,20 +120,12 @@ describe("URL Construction", () => {
|
||||
});
|
||||
|
||||
describe("Error Classes", () => {
|
||||
test("HttpError should store status and URL", () => {
|
||||
const error = new HttpError("Not found", 404, "https://example.com");
|
||||
expect(error.message).toBe("Not found");
|
||||
expect(error.statusCode).toBe(404);
|
||||
expect(error.url).toBe("https://example.com");
|
||||
expect(error.name).toBe("HttpError");
|
||||
});
|
||||
|
||||
test("NetworkError should store URL and cause", () => {
|
||||
const cause = new Error("Connection failed");
|
||||
const error = new NetworkError(
|
||||
"Network error",
|
||||
"https://example.com",
|
||||
cause
|
||||
cause,
|
||||
);
|
||||
expect(error.message).toBe("Network error");
|
||||
expect(error.url).toBe("https://example.com");
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { formatCentsToCurrency, slugify } from "../src/scrapers/kijiji";
|
||||
|
||||
describe("Utility Functions", () => {
|
||||
|
||||
@@ -5,12 +5,15 @@ const PORT = process.env.MCP_PORT || 4006;
|
||||
|
||||
const server = Bun.serve({
|
||||
port: PORT as number | string,
|
||||
idleTimeout: 0,
|
||||
idleTimeout: 255, // 255 seconds (max allowed)
|
||||
routes: {
|
||||
// MCP metadata discovery endpoint
|
||||
"/.well-known/mcp/server-card.json": new Response(JSON.stringify(serverCard), {
|
||||
"/.well-known/mcp/server-card.json": new Response(
|
||||
JSON.stringify(serverCard),
|
||||
{
|
||||
headers: { "Content-Type": "application/json" },
|
||||
}),
|
||||
},
|
||||
),
|
||||
|
||||
// MCP JSON-RPC 2.0 protocol endpoint
|
||||
"/mcp": async (req: Request) => {
|
||||
@@ -19,13 +22,13 @@ const server = Bun.serve({
|
||||
}
|
||||
return Response.json(
|
||||
{ message: "MCP endpoint requires POST request" },
|
||||
{ status: 405 }
|
||||
{ status: 405 },
|
||||
);
|
||||
},
|
||||
},
|
||||
|
||||
// Fallback for all other routes
|
||||
fetch(req: Request) {
|
||||
fetch(_req: Request) {
|
||||
return new Response("Not Found", { status: 404 });
|
||||
},
|
||||
});
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { fetchKijijiItems, fetchFacebookItems, fetchEbayItems } from "@marketplace-scrapers/core";
|
||||
import { tools } from "./tools";
|
||||
|
||||
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
|
||||
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default
|
||||
|
||||
/**
|
||||
* Handle MCP JSON-RPC 2.0 protocol requests
|
||||
*/
|
||||
@@ -16,7 +18,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
error: { code: -32600, message: "Invalid Request" },
|
||||
id: body.id,
|
||||
},
|
||||
{ status: 400 }
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
@@ -38,7 +40,8 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
name: "marketplace-scrapers",
|
||||
version: "1.0.0",
|
||||
},
|
||||
instructions: "Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
|
||||
instructions:
|
||||
"Use search_kijiji, search_facebook, or search_ebay tools to find listings across Canadian marketplaces",
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -78,15 +81,18 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32602, message: "Invalid params: name and arguments required" },
|
||||
error: {
|
||||
code: -32602,
|
||||
message: "Invalid params: name and arguments required",
|
||||
},
|
||||
{ status: 400 }
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
|
||||
// Route tool calls to appropriate handlers
|
||||
try {
|
||||
let result;
|
||||
let result: unknown;
|
||||
|
||||
if (name === "search_kijiji") {
|
||||
const query = args.query;
|
||||
@@ -97,8 +103,45 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchKijijiItems(query, args.maxItems || 5);
|
||||
result = items || [];
|
||||
const params = new URLSearchParams({ q: query });
|
||||
if (args.location) params.append("location", args.location);
|
||||
if (args.category) params.append("category", args.category);
|
||||
if (args.keywords) params.append("keywords", args.keywords);
|
||||
if (args.sortBy) params.append("sortBy", args.sortBy);
|
||||
if (args.sortOrder) params.append("sortOrder", args.sortOrder);
|
||||
if (args.maxPages)
|
||||
params.append("maxPages", args.maxPages.toString());
|
||||
if (args.priceMin)
|
||||
params.append("priceMin", args.priceMin.toString());
|
||||
if (args.priceMax)
|
||||
params.append("priceMax", args.priceMax.toString());
|
||||
if (args.cookies) params.append("cookies", args.cookies);
|
||||
|
||||
console.log(
|
||||
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/kijiji?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] Kijiji API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
} else if (name === "search_facebook") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
@@ -108,14 +151,37 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchFacebookItems(
|
||||
query,
|
||||
args.maxItems || 5,
|
||||
args.location || "toronto",
|
||||
25,
|
||||
args.cookiesSource
|
||||
const params = new URLSearchParams({ q: query });
|
||||
if (args.location) params.append("location", args.location);
|
||||
if (args.maxItems)
|
||||
params.append("maxItems", args.maxItems.toString());
|
||||
if (args.cookiesSource) params.append("cookies", args.cookiesSource);
|
||||
|
||||
console.log(
|
||||
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] Facebook API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
result = items || [];
|
||||
} else if (name === "search_ebay") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
@@ -125,16 +191,50 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchEbayItems(query, args.maxItems || 5, {
|
||||
minPrice: args.minPrice,
|
||||
maxPrice: args.maxPrice,
|
||||
strictMode: args.strictMode || false,
|
||||
exclusions: args.exclusions || [],
|
||||
keywords: args.keywords || [query],
|
||||
buyItNowOnly: args.buyItNowOnly !== false,
|
||||
canadaOnly: args.canadaOnly !== false,
|
||||
});
|
||||
result = items || [];
|
||||
const params = new URLSearchParams({ q: query });
|
||||
if (args.minPrice)
|
||||
params.append("minPrice", args.minPrice.toString());
|
||||
if (args.maxPrice)
|
||||
params.append("maxPrice", args.maxPrice.toString());
|
||||
if (args.strictMode !== undefined)
|
||||
params.append("strictMode", args.strictMode.toString());
|
||||
if (args.exclusions?.length)
|
||||
params.append("exclusions", args.exclusions.join(","));
|
||||
if (args.keywords?.length)
|
||||
params.append("keywords", args.keywords.join(","));
|
||||
if (args.buyItNowOnly !== undefined)
|
||||
params.append("buyItNowOnly", args.buyItNowOnly.toString());
|
||||
if (args.canadaOnly !== undefined)
|
||||
params.append("canadaOnly", args.canadaOnly.toString());
|
||||
if (args.maxItems)
|
||||
params.append("maxItems", args.maxItems.toString());
|
||||
if (args.cookies) params.append("cookies", args.cookies);
|
||||
|
||||
console.log(
|
||||
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] eBay API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
} else {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
@@ -156,11 +256,15 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error";
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
id,
|
||||
error: { code: -32603, message: `Tool execution failed: ${errorMessage}` },
|
||||
error: {
|
||||
code: -32603,
|
||||
message: `Tool execution failed: ${errorMessage}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -172,16 +276,17 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
id,
|
||||
error: { code: -32601, message: `Method not found: ${method}` },
|
||||
},
|
||||
{ status: 404 }
|
||||
{ status: 404 },
|
||||
);
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error";
|
||||
return Response.json(
|
||||
{
|
||||
jsonrpc: "2.0",
|
||||
error: { code: -32700, message: `Parse error: ${errorMessage}` },
|
||||
},
|
||||
{ status: 400 }
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
*/
|
||||
|
||||
export const serverCard = {
|
||||
$schema: "https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
|
||||
$schema:
|
||||
"https://static.modelcontextprotocol.io/schemas/mcp-server-card/v1.json",
|
||||
version: "1.0",
|
||||
protocolVersion: "2025-06-18",
|
||||
serverInfo: {
|
||||
@@ -20,6 +21,7 @@ export const serverCard = {
|
||||
listChanged: true,
|
||||
},
|
||||
},
|
||||
description: "Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
|
||||
description:
|
||||
"Scrapes marketplace listings from Kijiji, Facebook Marketplace, and eBay",
|
||||
tools: "dynamic",
|
||||
};
|
||||
|
||||
@@ -13,11 +13,50 @@ export const tools = [
|
||||
type: "string",
|
||||
description: "Search query for Kijiji listings",
|
||||
},
|
||||
maxItems: {
|
||||
location: {
|
||||
type: "string",
|
||||
description:
|
||||
"Location name or ID (e.g., 'toronto', 'gta', 'ontario')",
|
||||
},
|
||||
category: {
|
||||
type: "string",
|
||||
description:
|
||||
"Category name or ID (e.g., 'computers', 'furniture', 'bikes')",
|
||||
},
|
||||
keywords: {
|
||||
type: "string",
|
||||
description: "Additional keywords to filter results",
|
||||
},
|
||||
sortBy: {
|
||||
type: "string",
|
||||
description: "Sort results by field",
|
||||
enum: ["relevancy", "date", "price", "distance"],
|
||||
default: "relevancy",
|
||||
},
|
||||
sortOrder: {
|
||||
type: "string",
|
||||
description: "Sort order",
|
||||
enum: ["asc", "desc"],
|
||||
default: "desc",
|
||||
},
|
||||
maxPages: {
|
||||
type: "number",
|
||||
description: "Maximum number of items to return",
|
||||
description: "Maximum pages to fetch (~40 items per page)",
|
||||
default: 5,
|
||||
},
|
||||
priceMin: {
|
||||
type: "number",
|
||||
description: "Minimum price in cents",
|
||||
},
|
||||
priceMax: {
|
||||
type: "number",
|
||||
description: "Maximum price in cents",
|
||||
},
|
||||
cookies: {
|
||||
type: "string",
|
||||
description:
|
||||
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
@@ -52,7 +91,8 @@ export const tools = [
|
||||
},
|
||||
{
|
||||
name: "search_ebay",
|
||||
description: "Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
|
||||
description:
|
||||
"Search eBay for listings matching a query (default: Buy It Now only, Canada only)",
|
||||
inputSchema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
@@ -98,6 +138,11 @@ export const tools = [
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
cookies: {
|
||||
type: "string",
|
||||
description:
|
||||
"Optional: eBay session cookies to bypass bot detection (format: 'name1=value1; name2=value2')",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
|
||||
26
scripts/biome-symlink.sh
Executable file
26
scripts/biome-symlink.sh
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Get the path to the system biome executable
|
||||
BIOME_PATH=$(which biome)
|
||||
|
||||
if [ -z "$BIOME_PATH" ]; then
|
||||
echo "Error: biome executable not found in PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Find all biome executables in node_modules
|
||||
files=$(fd biome node_modules --type executable --no-ignore --follow)
|
||||
|
||||
if [ -z "$files" ]; then
|
||||
echo "No biome executables found in node_modules"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Replace each with a symlink to the system biome
|
||||
for file in $files; do
|
||||
echo "Replacing $file with symlink to $BIOME_PATH"
|
||||
rm "$file"
|
||||
ln -s "$BIOME_PATH" "$file"
|
||||
done
|
||||
|
||||
echo "Done."
|
||||
30
scripts/remove-eslint.sh
Executable file
30
scripts/remove-eslint.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
PATTERN="eslint"
|
||||
FILES="$(fd .)" # Or use 'find .' to search recursively
|
||||
|
||||
for file in $FILES; do
|
||||
if [[ -f "$file" ]]; then
|
||||
# 1. Use rg with line numbers (-n) and only the matched line (-o)
|
||||
# 2. Use awk to print ONLY the line number (field 1)
|
||||
# 3. Use xargs to pass multiple line numbers to a single sed command
|
||||
|
||||
LINE_NUMBERS=$(rg --line-number --no-filename "$PATTERN" "$file" | awk -F':' '{print $1}' | tr '\n' ',')
|
||||
|
||||
# Remove trailing comma if any
|
||||
LINE_NUMBERS=${LINE_NUMBERS%,}
|
||||
|
||||
if [[ -n "$LINE_NUMBERS" ]]; then
|
||||
echo "Deleting lines $LINE_NUMBERS from $file..."
|
||||
|
||||
# Use sed to delete the specified comma-separated line numbers in-place (-i)
|
||||
# NOTE: The syntax for -i might vary slightly between GNU sed (Linux) and BSD sed (macOS).
|
||||
sed -i.bak "${LINE_NUMBERS}d" "$file"
|
||||
|
||||
# Optional: Remove the backup file created by sed -i.bak
|
||||
# rm "${file}.bak"
|
||||
else
|
||||
echo "$file: No lines matching pattern found."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
25
scripts/start.sh
Executable file
25
scripts/start.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
# Trap SIGTERM and SIGINT for graceful shutdown
|
||||
trap 'echo "Received shutdown signal, stopping services..."; kill -TERM $API_PID $MCP_PID 2>/dev/null; wait' TERM INT
|
||||
|
||||
# Start API Server in background
|
||||
echo "Starting API Server on port ${API_PORT:-4005}..."
|
||||
bun dist/api/index.js &
|
||||
API_PID=$!
|
||||
|
||||
# Give API server a moment to initialize
|
||||
sleep 1
|
||||
|
||||
# Start MCP Server in background
|
||||
echo "Starting MCP Server on port ${API_PORT:-4006}..."
|
||||
bun dist/mcp/index.js &
|
||||
MCP_PID=$!
|
||||
|
||||
echo "Both services started successfully"
|
||||
echo "API Server PID: $API_PID"
|
||||
echo "MCP Server PID: $MCP_PID"
|
||||
|
||||
# Wait for both processes
|
||||
wait $API_PID $MCP_PID
|
||||
Reference in New Issue
Block a user