Compare commits

..

43 Commits

Author SHA1 Message Date
ec545723bb feat(facebook): add challenge detection and session warming utilities
facebook-challenge.ts: session warmup, header construction, and challenge type detection. Spec document for the anti-bot challenge solver design.
2026-05-02 19:03:00 -04:00
0a246a29bf feat(facebook): add session warming and challenge detection
Facebook Marketplace no longer requires authentication cookies.
Session warming sends proper browser headers. Checkpoint and
login-wall challenges are detected and handled gracefully.
Added marketplace_product_details_page.target extraction path
for current item page structure.
2026-05-02 18:58:53 -04:00
7ab33d0b02 chore: format markdown
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-05-01 11:42:54 -04:00
d2c3c07e7d docs: price filtering schema adjustments
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 23:18:49 -04:00
0470a7bec7 docs(mcp): clarify price filters are dollars 2026-04-30 23:17:59 -04:00
89ad1c521f fix(api): parse price filters as dollars 2026-04-30 23:17:56 -04:00
5c732287c5 test: guard live listing prices 2026-04-30 22:46:48 -04:00
20fb46190a test: add live parser script 2026-04-30 22:46:07 -04:00
e791fc5478 test(facebook): add live parser suite 2026-04-30 22:44:28 -04:00
c1fa5168dc test(kijiji): add live parser suite 2026-04-30 22:43:52 -04:00
ec2a26cedf test(ebay): add live parser suite 2026-04-30 22:42:32 -04:00
5d99e984e0 docs: plan live parser tests 2026-04-30 22:41:41 -04:00
b657ea594a chore: update agents docs
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 22:29:01 -04:00
5651a194e9 chore: use biome check instead of biome ci
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 22:28:02 -04:00
31cc0660bc refactor(ebay): reuse fetchHtml after challenge
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 22:26:24 -04:00
fc7200777e style: format expected json output in protocol test 2026-04-30 22:25:47 -04:00
f68a5a8d9b feat(linter): enforce correctness on unused imports
Configures the linter to treat unused imports as an error under the
`correctness` rule category. This tightens up code quality standards by
ensuring all imported bindings are utilized.
If the import is unused, there is a high chance refactoring missed this
flow. Review in-depth root causes.
2026-04-30 22:24:06 -04:00
a6b24b318e fix(types): expose argon2 declaration globally 2026-04-30 22:16:48 -04:00
0873df7e82 chore: merge code-smell-cleanup 2026-04-30 21:08:34 -04:00
24e0a8266e Revert "test: preload core fetch guard"
This reverts commit 28b3267b7d.
2026-04-30 20:58:06 -04:00
db173aef1b Revert "chore: add sentinel file for bun test root"
This reverts commit d1cd028f34.
2026-04-30 20:58:06 -04:00
d1cd028f34 chore: add sentinel file for bun test root 2026-04-30 20:56:14 -04:00
28b3267b7d test: preload core fetch guard 2026-04-30 20:53:31 -04:00
c0dda57f64 test: require explicit fetch mocks 2026-04-30 20:51:13 -04:00
31866de787 refactor: clean kijiji scraper internals 2026-04-30 20:48:15 -04:00
9c4c347933 feat: ebay splashui challenge solver
argon2id pow → /challengesvc/answer → chlgref cookie
warm homepage for akamai cookies, detect 307 redirect,
solve + retry transparently in fetchEbayItems flow
2026-04-30 20:44:37 -04:00
53eafe6d4c chore: agent-browser skills path env
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 20:44:05 -04:00
84f17fbdfd chore: ebay parser fix
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 16:56:55 -04:00
3a722a2d11 chore: agent-browser vars
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-30 16:56:44 -04:00
f95b974c7e fix: harden shared http helper 2026-04-29 21:09:10 -04:00
f5339cadf1 style: format shared http refactor 2026-04-29 21:05:36 -04:00
5d86a4e54d fix: preserve ebay rate-limit fallback 2026-04-29 14:52:08 -04:00
82e7abc057 fix: keep shared http refactor in scope 2026-04-29 14:48:47 -04:00
6e50ebf901 refactor: share scraper http fetching 2026-04-29 13:14:20 -04:00
5ecb645ee3 docs: smell cleanup plan
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-29 13:09:38 -04:00
82e12283de docs: surface Kijiji AND-matching behavior in tool, API, and MCP responses
Kijiji zero-result queries (e.g. 'macbook air m1 apple silicon') are
confusing because the failure mode is non-obvious. Surface the root
cause everywhere the caller can see it:
- MCP tool description warns about AND-matching and gives a concrete
  before/after example
- API 404 body includes the actionable hint via emptySearchResponse(hint)
- Core scraper logs the built URL and tip on page-1 zero results
- MCP handler unwraps the API message field so the hint reaches the LLM
2026-04-29 13:06:31 -04:00
22eb65d4a2 refactor: share mcp api calls 2026-04-29 05:37:24 -04:00
abdd39d65c fix: complete ebay integer validation test coverage 2026-04-29 00:56:37 -04:00
3e4e35c9ae fix: tighten route integer parsing and test coverage 2026-04-29 00:32:23 -04:00
3ea6ee3938 fix: strictly parse route integers 2026-04-29 00:12:26 -04:00
d178f9c9cb fix: remove cookie query forwarding 2026-04-28 23:52:45 -04:00
9cbba9ba13 chore: ignore local worktrees 2026-04-28 23:08:04 -04:00
b6aaec0b65 chore: update ruler docs
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-04-28 22:29:12 -04:00
55 changed files with 4643 additions and 997 deletions

4
.envrc
View File

@@ -1,4 +1,8 @@
export DIRENV_WARN_TIMEOUT=20s export DIRENV_WARN_TIMEOUT=20s
export AGENT_BROWSER_EXECUTABLE_PATH=/run/current-system/sw/bin/google-chrome-unstable
export AGENT_BROWSER_ENGINE=chrome
export AGENT_BROWSER_HEADED=0
export AGENT_BROWSER_SKILLS_DIR=.claude/skills
export OPENCODE_CONFIG_CONTENT="{\"plugin\":[\"superpowers@git+https://github.com/obra/superpowers.git\"]}" export OPENCODE_CONFIG_CONTENT="{\"plugin\":[\"superpowers@git+https://github.com/obra/superpowers.git\"]}"
eval "$(devenv direnvrc)" eval "$(devenv direnvrc)"

1
.gitignore vendored
View File

@@ -34,6 +34,7 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
.cache .cache
*.tsbuildinfo *.tsbuildinfo
.turbo .turbo
.worktrees/
# IntelliJ based IDEs # IntelliJ based IDEs
.idea .idea

View File

@@ -1,52 +1,9 @@
## Bun Guidelines ## Bun Guide
**CRITICAL**: Do not assume you know full Bun APIs. For **ANY** Bun API you use, confirm them by using `bun-docs` MCP tools. - Package manager/runtime/test runner is Bun `1.3.13`.
- Use `bun install`, `bun run <script>`, `bun test`, and `bun build`; do not add npm/yarn/pnpm scripts.
Default to using Bun instead of Node.js. - Prefer Bun-native runtime APIs already used in repo: `Bun.serve`, built-in `fetch`, Web APIs, and `bun:test`.
- Keep servers framework-free. Do not introduce Express/Koa/Fastify for the adapters.
- Use `bun <file>` instead of `node <file>` or `ts-node <file>` - Bun auto-loads `.env`; do not add `dotenv`.
- Use `bun test` instead of `jest` or `vitest` - For tests, import from `bun:test` and restore mocked globals/env in `afterEach` or `finally`.
- Use `bun build <file.html|file.ts|file.css>` instead of `webpack` or `esbuild` - Root `bun test` is misleading because `bunfig.toml` sets a dummy root. Run package test paths explicitly.
- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install`
- Use `bun run <script>` instead of `npm run <script>` or `yarn run <script>` or `pnpm run <script>`
- Use `bunx <package> <command>` instead of `npx <package> <command>`
- Bun automatically loads .env, so don't use dotenv.
### APIs
- `Bun.serve()` supports WebSockets, HTTPS, and routes. Don't use `express`.
- `bun:sqlite` for SQLite. Don't use `better-sqlite3`.
- `Bun.redis` for Redis. Don't use `ioredis`.
- `Bun.sql` for Postgres. Don't use `pg` or `postgres.js`.
- `WebSocket` is built-in. Don't use `ws`.
- Prefer `Bun.file` over `node:fs`'s readFile/writeFile
- Bun.$`ls` instead of execa.
### Testing
#### Quick Start
- Run tests: `bun test`
- Write tests in `tests/` folder
#### Test Structure
- Use `describe` blocks to group related tests
- Use `test` for individual test cases
- Use `beforeEach`/`afterEach` for setup/teardown
#### Assertions
- Import: `import { test, expect, describe, beforeEach, afterEach, mock } from "bun:test";`
- Common: `expect(value).toBe(expected)`, `expect(fn).rejects.toThrow()`
- Async: `await expect(asyncFn()).resolves.toBe(expected)`
#### Mocking
- Mock functions: `mock(fn)`
- Mock globals: `global.fetch = mock(...)`
- Restore mocks in `afterEach` or `finally`
#### Best Practices
- Mock external APIs (fetch, file I/O)
- Test error cases and edge conditions
- Use descriptive test names
- Clean up resources in `afterEach`
For more information, read the Bun API docs in `node_modules/bun-types/docs/**.mdx`.

View File

@@ -2,37 +2,47 @@
## Repo Shape ## Repo Shape
- Bun workspace monorepo. - Bun workspace monorepo with packages under `packages/*`.
- `packages/core`: scraper logic, parsing, shared cookie/http/format helpers, and the only checked-in tests. - `packages/core`: scraper behavior, parsing, result types, cookie handling, HTTP helpers.
- `packages/api-server`: Bun HTTP adapter exposing `/api/*` routes. - `packages/api-server`: Bun HTTP adapter exposing `/api/*` routes over core.
- `packages/mcp-server`: MCP JSON-RPC adapter that proxies to the API server. - `packages/mcp-server`: MCP/JSON-RPC adapter that proxies to the API server.
- `dist/`: build output. Do not edit generated files here. - `cookies/`: local cookie docs/examples only. Treat real cookie files as secrets.
- `cookies/`: local cookie examples and docs. Never commit real session cookies. - `dist/`, `node_modules/`, `.turbo/`, `.direnv/`, `.devenv/`: generated/vendor/cache. Do not edit.
## Commands ## Commands
- Install: `bun install` - Install: `bun install`
- Lint/format check: `bun run ci` - Lint/format/typecheck: `bun run ci`
- Build everything: `bun run build` - Build all packages: `bun run build`
- Run tests: `bun test` - Build bundled runtime output: `bun run build:all`
- Run tests: `bun test packages/core/test packages/api-server/test packages/mcp-server/test`
- API dev server: `bun run --cwd packages/api-server dev` - API dev server: `bun run --cwd packages/api-server dev`
- MCP dev server: `bun run --cwd packages/mcp-server dev` - MCP dev server: `bun run --cwd packages/mcp-server dev`
## Repo Conventions ## Boundaries
- Keep marketplace scraping behavior in `packages/core`. `api-server` and `mcp-server` stay thin adapters. - Marketplace behavior belongs in `packages/core`, not adapter packages.
- Preserve cookie precedence everywhere: request parameter > environment variable > cookie file. - HTTP route code should parse request input, call core, and map status/errors.
- Shared public surface for scraper code is `packages/core/src/index.ts`. Update exports deliberately. - MCP code should define tools, validate JSON-RPC flow, and map tool args to API URLs.
- Tests should stay deterministic and offline. Mock `fetch`; do not hit live marketplace endpoints. - Keep API query params and MCP tool args in sync.
- Use Bun and Bun-native APIs in this repo. Do not introduce Node-specific tooling unless already required. - Shared public surface for scraper code is `packages/core/src/index.ts`; update exports deliberately.
- Biome and strict TypeScript are part of the contract. Fix code to satisfy them; do not relax config.
## Invariants
- Cookie precedence in core helpers: explicit/request cookie string before environment variable.
- Tests must be deterministic and offline. Mock `fetch`; do not hit live marketplace endpoints.
- Use Bun and Bun-native APIs. Do not add Node-specific tooling unless already required.
- Biome and strict TypeScript are contract. Fix code; do not relax config.
## Verification ## Verification
- Core changes: `bun test && bun run ci` - Core changes: `bun test packages/core/test && bun run ci`
- Cross-package contract changes: `bun test && bun run ci && bun run build` - Adapter-only changes: relevant package build plus `bun run ci`
- Adapter-only changes: run the relevant package build plus `bun run ci` - Cross-package contract changes: `bun test packages/core/test packages/api-server/test packages/mcp-server/test && bun run ci && bun run build`
## Gotchas ## Gotchas
- The root `build` script emits separate bundles to `dist/api` and `dist/mcp`, then `scripts/start.sh` launches both. - `bunfig.toml` points test root at `./do-not-run-tests-from-root`; pass package test paths explicitly.
- Root `build` cleans `dist`, then Turbo emits bundles for API and MCP.
- `scripts/start.sh` launches `dist/api/index.js` and `dist/mcp/index.js`.
- Package `tsconfig.json` files override root `include`; shared ambient declarations under root `types/` must be included from each package that typechecks cross-package source.

View File

@@ -1,44 +1,56 @@
# Facebook Marketplace API Reverse Engineering # Facebook Marketplace API Reverse Engineering
## Overview ## Overview
This document tracks findings from reverse-engineering Facebook Marketplace APIs for listing details.
This document tracks findings from reverse-engineering Facebook Marketplace APIs for
listing details.
## Current Implementation Status ## Current Implementation Status
- Search functionality: Implemented in `src/facebook.ts` - Search functionality: Implemented in `src/facebook.ts`
- Individual listing details: Not yet implemented - Individual listing details: Not yet implemented
## Findings ## Findings
### Step 1: Initial Setup ### Step 1: Initial Setup
- Using Chrome DevTools to inspect Facebook Marketplace - Using Chrome DevTools to inspect Facebook Marketplace
- Need to authenticate with Facebook account to access marketplace data - Need to authenticate with Facebook account to access marketplace data
- Cookies required for full access - Cookies required for full access
- Current status: Successfully logged in and accessed marketplace data - Current status: Successfully logged in and accessed marketplace data
### Step 2: Individual Listing Details Analysis - COMPLETED ### Step 2: Individual Listing Details Analysis - COMPLETED
- **Data Location**: Embedded in HTML script tags within `require` array structure - **Data Location**: Embedded in HTML script tags within `require` array structure
- **Path**: `require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target` - **Path**:
`require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Authentication**: Required for full data access - **Authentication**: Required for full data access
- **Current Status**: Successfully reverse-engineered the API structure and data extraction method - **Current Status**: Successfully reverse-engineered the API structure and data
extraction method
### API Endpoints Discovered ### API Endpoints Discovered
#### Search Endpoint #### Search Endpoint
- URL: `https://www.facebook.com/marketplace/{location}/search` - URL: `https://www.facebook.com/marketplace/{location}/search`
- Parameters: `query`, `sortBy`, `exact` - Parameters: `query`, `sortBy`, `exact`
- Data embedded in HTML script tags with `require` structure - Data embedded in HTML script tags with `require` structure
- Authentication: Required (cookies) - Authentication: Required (cookies)
#### Listing Details Endpoint #### Listing Details Endpoint
- **URL Structure**: `https://www.facebook.com/marketplace/item/{listing_id}/` - **URL Structure**: `https://www.facebook.com/marketplace/item/{listing_id}/`
- **Data Source**: Server-side rendered HTML with embedded JSON data in script tags - **Data Source**: Server-side rendered HTML with embedded JSON data in script tags
- **Data Structure**: Relay/GraphQL style data structure under `require[0][3].__bbox.require[...].__bbox.result.data.viewer.marketplace_product_details_page.target` - **Data Structure**: Relay/GraphQL style data structure under
- **Extraction Method**: Parse JSON from script tags containing marketplace data, navigate to the target object `require[0][3].__bbox.require[...].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Extraction Method**: Parse JSON from script tags containing marketplace data,
navigate to the target object
- **Authentication**: Required (cookies) - **Authentication**: Required (cookies)
### Listing Data Structure Discovered (Current - 2026) ### Listing Data Structure Discovered (Current - 2026)
The current Facebook Marketplace API returns a comprehensive `GroupCommerceProductItem` object with the following key properties: The current Facebook Marketplace API returns a comprehensive `GroupCommerceProductItem`
object with the following key properties:
```typescript ```typescript
interface FacebookMarketplaceItem { interface FacebookMarketplaceItem {
@@ -151,6 +163,7 @@ interface FacebookMarketplaceItem {
``` ```
### Example Data Extracted (Current Structure) ### Example Data Extracted (Current Structure)
```json ```json
{ {
"__typename": "GroupCommerceProductItem", "__typename": "GroupCommerceProductItem",
@@ -228,36 +241,47 @@ interface FacebookMarketplaceItem {
## Data Extraction Method ## Data Extraction Method
### Current Method (2026) ### Current Method (2026)
Facebook Marketplace listing data is embedded in JSON within `<script>` tags in the HTML response. The extraction process:
1. **Find the Correct Script**: Look for script tags containing marketplace listing data by searching for key fields like `marketplace_listing_title`, `redacted_description`, and `formatted_price`. Facebook Marketplace listing data is embedded in JSON within `<script>` tags in the HTML
response. The extraction process:
1. **Find the Correct Script**: Look for script tags containing marketplace listing data
by searching for key fields like `marketplace_listing_title`, `redacted_description`,
and `formatted_price`.
2. **Parse JSON Structure**: The data is nested within a `require` array structure: 2. **Parse JSON Structure**: The data is nested within a `require` array structure:
``` ```
require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target
``` ```
3. **Navigate to Target Object**: The actual listing data is a `GroupCommerceProductItem` object containing comprehensive information about the listing, seller, and vehicle details. 3. **Navigate to Target Object**: The actual listing data is a
`GroupCommerceProductItem` object containing comprehensive information about the
listing, seller, and vehicle details.
4. **Handle Dynamic Structure**: Facebook may change the exact path, so robust extraction should search for the target object recursively within the parsed JSON. 4. **Handle Dynamic Structure**: Facebook may change the exact path, so robust
extraction should search for the target object recursively within the parsed JSON.
### Authentication Requirements ### Authentication Requirements
- Valid Facebook session cookies are required - Valid Facebook session cookies are required
- User must be logged in to Facebook - User must be logged in to Facebook
- Marketplace access may be location-restricted - Marketplace access may be location-restricted
## Tools Used ## Tools Used
- Chrome DevTools Protocol - Chrome DevTools Protocol
- Network monitoring - Network monitoring
- HTML/script parsing - HTML/script parsing
- JSON structure analysis - JSON structure analysis
## Implementation Status ## Implementation Status
- ✅ Successfully reverse-engineered Facebook Marketplace API for listing details - ✅ Successfully reverse-engineered Facebook Marketplace API for listing details
- ✅ Identified current data structure and extraction method (2026) - ✅ Identified current data structure and extraction method (2026)
- ✅ Documented comprehensive GroupCommerceProductItem interface - ✅ Documented comprehensive GroupCommerceProductItem interface
- ✅ Implemented `extractFacebookItemData()` function with script parsing logic - ✅ Implemented `extractFacebookItemData()` function with script parsing logic
- ✅ Implemented `parseFacebookItem()` function to convert GroupCommerceProductItem to ListingDetails - ✅ Implemented `parseFacebookItem()` function to convert GroupCommerceProductItem to
ListingDetails
- ✅ Implemented `fetchFacebookItem()` function with authentication and error handling - ✅ Implemented `fetchFacebookItem()` function with authentication and error handling
- ✅ Updated TypeScript interfaces to match current API structure - ✅ Updated TypeScript interfaces to match current API structure
- ✅ Added robust extraction with fallback methods for changing API paths - ✅ Added robust extraction with fallback methods for changing API paths
@@ -266,12 +290,15 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
### Core Functions Implemented ### Core Functions Implemented
1. **`extractFacebookItemData(htmlString)`**: Extracts marketplace item data from HTML-embedded JSON in script tags 1. **`extractFacebookItemData(htmlString)`**: Extracts marketplace item data from
HTML-embedded JSON in script tags
- Searches for scripts containing marketplace listing data - Searches for scripts containing marketplace listing data
- Uses primary path: `require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target` - Uses primary path:
`require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
- Falls back to recursive search for GroupCommerceProductItem objects - Falls back to recursive search for GroupCommerceProductItem objects
2. **`parseFacebookItem(item)`**: Converts Facebook's GroupCommerceProductItem to unified ListingDetails format 2. **`parseFacebookItem(item)`**: Converts Facebooks GroupCommerceProductItem to
unified ListingDetails format
- Handles pricing (FREE listings, CAD currency) - Handles pricing (FREE listings, CAD currency)
- Extracts seller information, location, and status - Extracts seller information, location, and status
- Supports vehicle-specific metadata - Supports vehicle-specific metadata
@@ -284,25 +311,31 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
- Returns parsed ListingDetails or null on failure - Returns parsed ListingDetails or null on failure
### Authentication Requirements ### Authentication Requirements
- Facebook session cookies required in `./cookies/facebook.json` or provided as parameter
- Facebook session cookies required in `./cookies/facebook.json` or provided as
parameter
- Cookies must include valid authentication tokens for marketplace access - Cookies must include valid authentication tokens for marketplace access
- Handles cookie expiration and domain validation - Handles cookie expiration and domain validation
## Current Implementation Status - 2026 Verification ## Current Implementation Status - 2026 Verification
### Step 3: API Verification and Current Structure Analysis (January 2026) ### Step 3: API Verification and Current Structure Analysis (January 2026)
- **Verification Date**: January 22, 2026 - **Verification Date**: January 22, 2026
- **Status**: Successfully verified current Facebook Marketplace API structure - **Status**: Successfully verified current Facebook Marketplace API structure
- **Data Source**: Embedded JSON in HTML script tags (server-side rendered) - **Data Source**: Embedded JSON in HTML script tags (server-side rendered)
- **Extraction Path**: `require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target` - **Extraction Path**:
`require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
#### Verified Listing Structure (Real Example - 2006 Hyundai Tiburon) #### Verified Listing Structure (Real Example - 2006 Hyundai Tiburon)
- **Listing ID**: 1226468515995685 - **Listing ID**: 1226468515995685
- **Title**: "2006 Hyundai Tiburon" - **Title**: 2006 Hyundai Tiburon
- **Price**: CA$3,000 (formatted_price.text) - **Price**: CA$3,000 (formatted_price.text)
- **Raw Price Data**: {"amount_with_offset": "300000", "currency": "CAD", "amount": "3000.00"} - **Raw Price Data**: {"amount_with_offset": 300000, currency: CAD, amount”:
"3000.00"}
- **Location**: Hamilton, ON (with coordinates: 43.250427246094, -79.963989257812) - **Location**: Hamilton, ON (with coordinates: 43.250427246094, -79.963989257812)
- **Description**: "As is" (redacted_description.text) - **Description**: As is (redacted_description.text)
- **Vehicle Details**: - **Vehicle Details**:
- Make: Hyundai - Make: Hyundai
- Model: Tiburon - Model: Tiburon
@@ -323,41 +356,54 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
- **Messaging**: Enabled - **Messaging**: Enabled
#### Current API Characteristics #### Current API Characteristics
- **Authentication**: Still requires valid Facebook session cookies - **Authentication**: Still requires valid Facebook session cookies
- **Data Format**: Server-side rendered HTML with embedded GraphQL/Relay JSON - **Data Format**: Server-side rendered HTML with embedded GraphQL/Relay JSON
- **Structure Stability**: Primary extraction path remains functional - **Structure Stability**: Primary extraction path remains functional
- **Additional Features**: Includes marketplace ratings, seller verification badges, cross-posting info - **Additional Features**: Includes marketplace ratings, seller verification badges,
cross-posting info
### API Changes Observed Since 2024 Documentation ### API Changes Observed Since 2024 Documentation
- **Minimal Changes**: Core data structure largely unchanged - **Minimal Changes**: Core data structure largely unchanged
- **Enhanced Fields**: Added more detailed vehicle specifications and seller profile information - **Enhanced Fields**: Added more detailed vehicle specifications and seller profile
- **GraphQL Integration**: Deeper integration with Facebook's GraphQL infrastructure information
- **GraphQL Integration**: Deeper integration with Facebooks GraphQL infrastructure
- **Security Features**: Additional integrity checks and reporting mechanisms - **Security Features**: Additional integrity checks and reporting mechanisms
### Multi-Category Testing Results (January 2026) ### Multi-Category Testing Results (January 2026)
Successfully tested extraction across different listing categories: Successfully tested extraction across different listing categories:
#### 1. Vehicle Listings (Automotive) #### 1. Vehicle Listings (Automotive)
- **Example**: 2006 Hyundai Tiburon (ID: 1226468515995685) - **Example**: 2006 Hyundai Tiburon (ID: 1226468515995685)
- **Status**: ✅ Fully functional - **Status**: ✅ Fully functional
- **Data Extracted**: Complete vehicle specs, pricing, seller info, location coordinates - **Data Extracted**: Complete vehicle specs, pricing, seller info, location coordinates
- **Unique Fields**: vehicle_make_display_name, vehicle_odometer_data, vehicle_transmission_type, vehicle_exterior_color, vehicle_interior_color, vehicle_fuel_type - **Unique Fields**: vehicle_make_display_name, vehicle_odometer_data,
vehicle_transmission_type, vehicle_exterior_color, vehicle_interior_color,
vehicle_fuel_type
#### 2. Electronics Listings #### 2. Electronics Listings
- **Example**: Nintendo Switch (ID: 3903865769914262) - **Example**: Nintendo Switch (ID: 3903865769914262)
- **Status**: ✅ Fully functional - **Status**: ✅ Fully functional
- **Data Extracted**: Title, price (CA$140), location (Toronto, ON), condition (Used - like new), seller (Yitao Hou) - **Data Extracted**: Title, price (CA$140), location (Toronto, ON), condition (Used -
like new), seller (Yitao Hou)
- **Category**: Electronics (category_id: 479353692612078) - **Category**: Electronics (category_id: 479353692612078)
- **Notes**: Standard GroupCommerceProductItem structure applies - **Notes**: Standard GroupCommerceProductItem structure applies
#### 3. Home Goods/Furniture Listings #### 3. Home Goods/Furniture Listings
- **Example**: Tabletop Mirror (cat not included) (ID: 1082389057290709) - **Example**: Tabletop Mirror (cat not included) (ID: 1082389057290709)
- **Status**: ✅ Fully functional - **Status**: ✅ Fully functional
- **Data Extracted**: Title, price (CA$5), location (Mississauga, ON), condition (Used - like new), seller (Rohit Rehan) - **Data Extracted**: Title, price (CA$5), location (Mississauga, ON), condition (Used -
like new), seller (Rohit Rehan)
- **Category**: Home Goods (category_id: 1569171756675761) - **Category**: Home Goods (category_id: 1569171756675761)
- **Notes**: Includes detailed description and delivery options - **Notes**: Includes detailed description and delivery options
#### Testing Summary #### Testing Summary
- **Extraction Method**: Consistent across all categories - **Extraction Method**: Consistent across all categories
- **Data Structure**: GroupCommerceProductItem interface works for all listing types - **Data Structure**: GroupCommerceProductItem interface works for all listing types
- **Authentication**: Required for all categories - **Authentication**: Required for all categories
@@ -365,16 +411,20 @@ Successfully tested extraction across different listing categories:
- **Edge Cases**: All tested listings were active/in-person pickup - **Edge Cases**: All tested listings were active/in-person pickup
## Implementation Status - COMPLETED (January 2026) ## Implementation Status - COMPLETED (January 2026)
- ✅ Successfully reverse-engineered Facebook Marketplace API for listing details - ✅ Successfully reverse-engineered Facebook Marketplace API for listing details
- ✅ Verified current API structure and extraction method (January 2026) - ✅ Verified current API structure and extraction method (January 2026)
- ✅ Tested extraction across multiple listing categories (vehicles, electronics, home goods) - ✅ Tested extraction across multiple listing categories (vehicles, electronics, home
- ✅ Implemented comprehensive error handling for sold/removed listings and authentication failures goods)
- ✅ Implemented comprehensive error handling for sold/removed listings and
authentication failures
- ✅ Enhanced rate limiting and retry logic (already robust) - ✅ Enhanced rate limiting and retry logic (already robust)
- ✅ Added monitoring and metrics for API stability detection - ✅ Added monitoring and metrics for API stability detection
- ✅ Updated all scraper functions to use verified extraction methods - ✅ Updated all scraper functions to use verified extraction methods
- ✅ Documented comprehensive GroupCommerceProductItem interface with real examples - ✅ Documented comprehensive GroupCommerceProductItem interface with real examples
## Next Steps (Future Maintenance) ## Next Steps (Future Maintenance)
1. Monitor extraction success rates for API change detection 1. Monitor extraction success rates for API change detection
2. Update extraction paths if Facebook changes their API structure 2. Update extraction paths if Facebook changes their API structure
3. Add support for additional marketplace features as they become available 3. Add support for additional marketplace features as they become available

145
KIJIJI.md
View File

@@ -1,9 +1,13 @@
# Kijiji API Findings # Kijiji API Findings
## Overview ## Overview
Kijiji is a Canadian classifieds marketplace that uses a modern web application built with Next.js and Apollo GraphQL. The search results are powered by a GraphQL API with client-side state management.
Kijiji is a Canadian classifieds marketplace that uses a modern web application built
with Next.js and Apollo GraphQL. The search results are powered by a GraphQL API with
client-side state management.
## Initial Page Load (Homepage) ## Initial Page Load (Homepage)
- **URL**: https://www.kijiji.ca/ - **URL**: https://www.kijiji.ca/
- **Architecture**: Server-side rendered React application with Next.js - **Architecture**: Server-side rendered React application with Next.js
- **Data Sources**: - **Data Sources**:
@@ -12,18 +16,27 @@ Kijiji is a Canadian classifieds marketplace that uses a modern web application
- No initial API calls for listings - data appears to be embedded in HTML - No initial API calls for listings - data appears to be embedded in HTML
## Search Results Page ## Search Results Page
- **URL Pattern**: `https://www.kijiji.ca/b-[location]/[keywords]/k0l0` - **URL Pattern**: `https://www.kijiji.ca/b-[location]/[keywords]/k0l0`
- **Example**: `https://www.kijiji.ca/b-canada/iphone/k0l0` - **Example**: `https://www.kijiji.ca/b-canada/iphone/k0l0`
- **Technology Stack**: Next.js with Apollo GraphQL client - **Technology Stack**: Next.js with Apollo GraphQL client
- **Data Structure**: Uses `__APOLLO_STATE__` global object containing normalized GraphQL cache - **Data Structure**: Uses `__APOLLO_STATE__` global object containing normalized
GraphQL cache
### GraphQL Data Structure ### GraphQL Data Structure
#### Data Location #### Data Location
Search results data is embedded in the Next.js page props under `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`. The data is pre-rendered on the server and sent to the client. Each page (including pagination) has its own pre-rendered data.
Search results data is embedded in the Next.js page props under
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`. The data is pre-rendered on the server
and sent to the client.
Each page (including pagination) has its own pre-rendered data.
#### Search Results Container #### Search Results Container
The search results are stored directly in the Apollo ROOT_QUERY with keys following the pattern `searchResultsPageByUrl:{url_path}` where `url_path` includes pagination parameters.
The search results are stored directly in the Apollo ROOT_QUERY with keys following the
pattern `searchResultsPageByUrl:{url_path}` where `url_path` includes pagination
parameters.
```json ```json
{ {
@@ -33,17 +46,20 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
``` ```
#### Pagination Handling #### Pagination Handling
- Each page is server-side rendered with its own embedded data - Each page is server-side rendered with its own embedded data
- No client-side GraphQL requests for pagination - No client-side GraphQL requests for pagination
- URL parameter `?page=N` controls which page data is embedded - URL parameter `?page=N` controls which page data is embedded
- Offset in searchString corresponds to `(page-1) * limit` - Offset in searchString corresponds to `(page-1) * limit`
#### Search Parameters in URL #### Search Parameters in URL
- `k0c{CATEGORY}l{LOCATION}` - Category and location IDs - `k0c{CATEGORY}l{LOCATION}` - Category and location IDs
- `?page=N` - Page number (1-based) - `?page=N` - Page number (1-based)
- Data contains `offset` and `limit` for API-style pagination - Data contains `offset` and `limit` for API-style pagination
#### Individual Listing Structure #### Individual Listing Structure
```json ```json
{ {
"id": "1732061412", "id": "1732061412",
@@ -90,6 +106,7 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
``` ```
### URL Parameters ### URL Parameters
- `sort=MATCH` - Sort by relevance - `sort=MATCH` - Sort by relevance
- `order=DESC` - Descending order - `order=DESC` - Descending order
- `type=OFFER` - Show offerings (not wanted ads) - `type=OFFER` - Show offerings (not wanted ads)
@@ -102,6 +119,7 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
- `eaTopAdPosition=1` - ? - `eaTopAdPosition=1` - ?
### Image API ### Image API
- **Endpoint**: `https://media.kijiji.ca/api/v1/` - **Endpoint**: `https://media.kijiji.ca/api/v1/`
- **Pattern**: `/ca-prod-fsbo-ads/images/{uuid}?rule=kijijica-{size}-jpg` - **Pattern**: `/ca-prod-fsbo-ads/images/{uuid}?rule=kijijica-{size}-jpg`
- **Sizes**: 200, 300, 400, 500 pixels - **Sizes**: 200, 300, 400, 500 pixels
@@ -109,10 +127,12 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
### Categories and Locations ### Categories and Locations
#### Category Structure #### Category Structure
Categories are hierarchical with parent-child relationships. The main categories under "Buy & Sell" include:
Categories are hierarchical with parent-child relationships.
The main categories under “Buy & Sell” include:
| ID | Name | Total Results (iPhone search) | | ID | Name | Total Results (iPhone search) |
|----|------|------------------------------| | --- | --- | --- |
| 10 | Buy & Sell | 19956 | | 10 | Buy & Sell | 19956 |
| 12 | Arts & Collectibles | 149 | | 12 | Arts & Collectibles | 149 |
| 767 | Audio | 481 | | 767 | Audio | 481 |
@@ -145,10 +165,11 @@ Categories are hierarchical with parent-child relationships. The main categories
| 26 | Other | 286 | | 26 | Other | 286 |
#### Location Structure #### Location Structure
Locations are also hierarchical, with provinces/states under the main "Canada" location:
Locations are also hierarchical, with provinces/states under the main “Canada” location:
| ID | Name | Total Results (iPhone search) | | ID | Name | Total Results (iPhone search) |
|----|------|------------------------------| | --- | --- | --- |
| 0 | Canada | - | | 0 | Canada | - |
| 9001 | Québec | 2516 | | 9001 | Québec | 2516 |
| 9002 | Nova Scotia | 875 | | 9002 | Nova Scotia | 875 |
@@ -163,16 +184,20 @@ Locations are also hierarchical, with provinces/states under the main "Canada" l
| 9011 | Prince Edward Island | 31 | | 9011 | Prince Edward Island | 31 |
#### URL Patterns #### URL Patterns
- Categories: `/b-{category-slug}/canada/{keywords}/k0c{CATEGORY_ID}l0` - Categories: `/b-{category-slug}/canada/{keywords}/k0c{CATEGORY_ID}l0`
- Locations: `/b-buy-sell/{location-slug}/iphone/k0c10l{LOCATION_ID}` - Locations: `/b-buy-sell/{location-slug}/iphone/k0c10l{LOCATION_ID}`
- Combined: `/b-{category-slug}/{location-slug}/{keywords}/k0c{CATEGORY_ID}l{LOCATION_ID}` - Combined:
`/b-{category-slug}/{location-slug}/{keywords}/k0c{CATEGORY_ID}l{LOCATION_ID}`
### Pagination ### Pagination
- Uses offset-based pagination - Uses offset-based pagination
- 40 results per page - 40 results per page
- Total count provided in pagination metadata - Total count provided in pagination metadata
## Authentication & User Management ## Authentication & User Management
- **Authentication System**: OAuth2-based using CIS (Customer Identity Service) - **Authentication System**: OAuth2-based using CIS (Customer Identity Service)
- **Identity Provider**: `id.kijiji.ca` - **Identity Provider**: `id.kijiji.ca`
- **OAuth2 Flow**: - **OAuth2 Flow**:
@@ -184,24 +209,30 @@ Locations are also hierarchical, with provinces/states under the main "Canada" l
- **User Features**: Saved searches, messaging, flagging require authentication - **User Features**: Saved searches, messaging, flagging require authentication
## Posting API ## Posting API
- **Posting Flow**: Requires authentication, redirects to login if not authenticated - **Posting Flow**: Requires authentication, redirects to login if not authenticated
- **Posting URL**: `https://www.kijiji.ca/p-post-ad.html` - **Posting URL**: `https://www.kijiji.ca/p-post-ad.html`
- **Authentication Required**: Yes, redirects to `/consumer/login` for unauthenticated users - **Authentication Required**: Yes, redirects to `/consumer/login` for unauthenticated
- **Post-Creation**: Likely uses authenticated GraphQL mutations (not observed in anonymous browsing) users
- **Post-Creation**: Likely uses authenticated GraphQL mutations (not observed in
anonymous browsing)
## GraphQL API Endpoint ## GraphQL API Endpoint
- **URL**: `https://www.kijiji.ca/anvil/api` - **URL**: `https://www.kijiji.ca/anvil/api`
- **Method**: POST - **Method**: POST
- **Content-Type**: application/json - **Content-Type**: application/json
- **Headers**: - **Headers**:
- `apollo-require-preflight: true` - `apollo-require-preflight: true`
- Standard CORS headers - Standard CORS headers
- **Authentication**: No authentication required for basic queries (uses cookies for session tracking) - **Authentication**: No authentication required for basic queries (uses cookies for
session tracking)
- **Technology**: Apollo GraphQL server - **Technology**: Apollo GraphQL server
### Sample GraphQL Queries Discovered ### Sample GraphQL Queries Discovered
#### Get Search Categories #### Get Search Categories
```graphql ```graphql
query getSearchCategories($locale: String!) { query getSearchCategories($locale: String!) {
searchCategories { searchCategories {
@@ -218,6 +249,7 @@ Variables: `{"locale": "en-CA"}`
Response includes hierarchical category structure with IDs and localized names. Response includes hierarchical category structure with IDs and localized names.
#### Get Geocode from IP (fails for current IP) #### Get Geocode from IP (fails for current IP)
```graphql ```graphql
query GetGeocodeReverseFromIp { query GetGeocodeReverseFromIp {
geocodeReverseFromIp { geocodeReverseFromIp {
@@ -229,9 +261,11 @@ query GetGeocodeReverseFromIp {
} }
``` ```
This query fails for the current IP address, suggesting geolocation-based features may not work or require different IP ranges. This query fails for the current IP address, suggesting geolocation-based features may
not work or require different IP ranges.
#### Get Category Path #### Get Category Path
```graphql ```graphql
query GetCategoryPath($categoryId: Int!, $locale: String, $locationId: Int) { query GetCategoryPath($categoryId: Int!, $locale: String, $locationId: Int) {
category(id: $categoryId) { category(id: $categoryId) {
@@ -256,25 +290,33 @@ Variables: `{"categoryId": 10, "locationId": 0, "locale": "en-CA"}`
## Latest Findings (2026-01-21) ## Latest Findings (2026-01-21)
### Client-Side GraphQL Queries Observed ### Client-Side GraphQL Queries Observed
- **getSearchCategories**: Retrieves category hierarchy for search filters - **getSearchCategories**: Retrieves category hierarchy for search filters
- **GetGeocodeReverseFromIp**: Attempts to geolocate user (fails for current IP) - **GetGeocodeReverseFromIp**: Attempts to geolocate user (fails for current IP)
### GraphQL Schema Insights ### GraphQL Schema Insights
Testing direct GraphQL queries revealed:
- Field "searchResults" does not exist on Query type
- Suggested alternatives: "searchResultsPage" or "searchUrl"
- This suggests the search functionality may use different GraphQL operations than direct queries
The embedded Apollo state approach appears to be the primary method for accessing search data, with GraphQL used for auxiliary operations like categories and geolocation. Testing direct GraphQL queries revealed:
- Field “searchResults” does not exist on Query type
- Suggested alternatives: “searchResultsPage” or “searchUrl”
- This suggests the search functionality may use different GraphQL operations than
direct queries
The embedded Apollo state approach appears to be the primary method for accessing search
data, with GraphQL used for auxiliary operations like categories and geolocation.
### Server-Side Rendering Architecture ### Server-Side Rendering Architecture
Search results are fully server-side rendered with data embedded in HTML. Each page (including pagination) contains its own pre-rendered data. No client-side GraphQL requests are made for:
Search results are fully server-side rendered with data embedded in HTML. Each page
(including pagination) contains its own pre-rendered data.
No client-side GraphQL requests are made for:
- Initial search results - Initial search results
- Pagination navigation - Pagination navigation
- Search result data - Search result data
### Network Analysis Findings ### Network Analysis Findings
- GraphQL endpoint: `https://www.kijiji.ca/anvil/api` - GraphQL endpoint: `https://www.kijiji.ca/anvil/api`
- Method: POST - Method: POST
- Content-Type: application/json - Content-Type: application/json
@@ -282,7 +324,10 @@ Search results are fully server-side rendered with data embedded in HTML. Each p
- Cookies required for session tracking - Cookies required for session tracking
### Embedded Data Structure ### Embedded Data Structure
Search results data is embedded in the HTML within Next.js `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__` object. The data includes:
Search results data is embedded in the HTML within Next.js
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__` object.
The data includes:
- Individual ad listings with complete metadata - Individual ad listings with complete metadata
- Pagination information - Pagination information
@@ -290,20 +335,24 @@ Search results data is embedded in the HTML within Next.js `__NEXT_DATA__.props.
- Category/location hierarchies - Category/location hierarchies
### Current Scraper Implementation ### Current Scraper Implementation
The existing `src/kijiji.ts` implementation correctly parses the embedded Apollo state: The existing `src/kijiji.ts` implementation correctly parses the embedded Apollo state:
- Uses `extractApolloState()` to parse `__NEXT_DATA__` from HTML - Uses `extractApolloState()` to parse `__NEXT_DATA__` from HTML
- Filters Apollo keys containing "Listing" to find ad data - Filters Apollo keys containing Listing to find ad data
- Extracts `url`, `title`, and other metadata from each listing - Extracts `url`, `title`, and other metadata from each listing
- Successfully scrapes listings without needing API authentication - Successfully scrapes listings without needing API authentication
### Authentication Status ### Authentication Status
- **Search functionality**: No authentication required - all search and listing data accessible anonymously
- **Search functionality**: No authentication required - all search and listing data
accessible anonymously
- **Posting functionality**: Requires authentication (redirects to login) - **Posting functionality**: Requires authentication (redirects to login)
- **User features**: Saved searches, messaging require authentication - **User features**: Saved searches, messaging require authentication
- **Rate limiting**: May apply but not observed in anonymous browsing - **Rate limiting**: May apply but not observed in anonymous browsing
### Pagination Implementation ### Pagination Implementation
- Each page is a separate server-rendered route - Each page is a separate server-rendered route
- URL pattern: `/b-{location}/{keywords}/page-{number}/k0{category}l{location_id}` - URL pattern: `/b-{location}/{keywords}/page-{number}/k0{category}l{location_id}`
- No client-side pagination API calls - No client-side pagination API calls
@@ -313,20 +362,24 @@ The existing `src/kijiji.ts` implementation correctly parses the embedded Apollo
## URL Pattern Analysis ## URL Pattern Analysis
### Search URL Structure ### Search URL Structure
`https://www.kijiji.ca/b-{category_slug}/{location_slug}/{keywords}/k0c{category_id}l{location_id}` `https://www.kijiji.ca/b-{category_slug}/{location_slug}/{keywords}/k0c{category_id}l{location_id}`
#### Examples Observed: #### Examples Observed:
- All categories, Canada: `/b-canada/iphone/k0l0` (c0 = All Categories, l0 = Canada) - All categories, Canada: `/b-canada/iphone/k0l0` (c0 = All Categories, l0 = Canada)
- Cell phones category: `/b-cell-phones/canada/iphone/k0c132l0` (c132 = Cell Phones) - Cell phones category: `/b-cell-phones/canada/iphone/k0c132l0` (c132 = Cell Phones)
- With pagination: `/b-canada/iphone/page-2/k0l0` - With pagination: `/b-canada/iphone/page-2/k0l0`
#### URL Components: #### URL Components:
- `c{CATEGORY_ID}`: Category ID (0 = All Categories, 132 = Cell Phones, etc.) - `c{CATEGORY_ID}`: Category ID (0 = All Categories, 132 = Cell Phones, etc.)
- `l{LOCATION_ID}`: Location ID (0 = Canada, 1700272 = GTA, etc.) - `l{LOCATION_ID}`: Location ID (0 = Canada, 1700272 = GTA, etc.)
- `page-{N}`: Pagination (1-based, optional) - `page-{N}`: Pagination (1-based, optional)
- Keywords are slugified in URL path - Keywords are slugified in URL path
### Current Implementation Status ### Current Implementation Status
The existing scraper in `src/kijiji.ts` successfully implements the approach: The existing scraper in `src/kijiji.ts` successfully implements the approach:
- Parses embedded Apollo state from HTML responses - Parses embedded Apollo state from HTML responses
- Handles rate limiting and retries - Handles rate limiting and retries
@@ -336,14 +389,22 @@ The existing scraper in `src/kijiji.ts` successfully implements the approach:
## Listing Details Page ## Listing Details Page
### Overview ### Overview
Similar to search results, listing details pages use server-side rendering with embedded Apollo GraphQL state in the HTML. No dedicated API endpoint serves individual listing data - all information is pre-rendered on the server.
Similar to search results, listing details pages use server-side rendering with embedded
Apollo GraphQL state in the HTML. No dedicated API endpoint serves individual listing
data - all information is pre-rendered on the server.
### Data Architecture ### Data Architecture
- **Server-Side Rendering**: Each listing page is fully server-rendered with data embedded in HTML
- **Embedded Apollo State**: Listing data is stored in `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__` - **Server-Side Rendering**: Each listing page is fully server-rendered with data
- **Client-Side GraphQL**: Additional data (categories, campaigns, similar listings, user profiles) fetched via GraphQL API embedded in HTML
- **Embedded Apollo State**: Listing data is stored in
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`
- **Client-Side GraphQL**: Additional data (categories, campaigns, similar listings,
user profiles) fetched via GraphQL API
### Listing Data Structure ### Listing Data Structure
The main listing data follows the same pattern as search results: The main listing data follows the same pattern as search results:
```json ```json
@@ -385,40 +446,50 @@ The main listing data follows the same pattern as search results:
``` ```
### Client-Side GraphQL Queries ### Client-Side GraphQL Queries
When loading a listing details page, the following GraphQL queries are executed: When loading a listing details page, the following GraphQL queries are executed:
#### 1. getSearchCategories #### 1. getSearchCategories
- **Purpose**: Category hierarchy for navigation - **Purpose**: Category hierarchy for navigation
- **Variables**: `{"locale": "en-CA"}` - **Variables**: `{"locale": "en-CA"}`
- **Response**: Hierarchical category structure - **Response**: Hierarchical category structure
#### 2. getCampaignsForVip #### 2. getCampaignsForVip
- **Purpose**: Advertisement targeting data - **Purpose**: Advertisement targeting data
- **Variables**: `{"placement": "vip", "locationId": 1700275, "categoryId": 760, "platform": "desktop"}` - **Variables**:
`{"placement": "vip", "locationId": 1700275, "categoryId": 760, "platform": "desktop"}`
- **Response**: Campaign/ads data (usually null) - **Response**: Campaign/ads data (usually null)
#### 3. GetReviewSummary #### 3. GetReviewSummary
- **Purpose**: Seller review statistics - **Purpose**: Seller review statistics
- **Variables**: `{"userId": "1044934581"}` - **Variables**: `{"userId": "1044934581"}`
- **Response**: Review count and score (usually 0 for new sellers) - **Response**: Review count and score (usually 0 for new sellers)
#### 4. GetProfileMetrics #### 4. GetProfileMetrics
- **Purpose**: Seller profile information - **Purpose**: Seller profile information
- **Variables**: `{"profileId": "1044934581"}` - **Variables**: `{"profileId": "1044934581"}`
- **Response**: Member since date, account type - **Response**: Member since date, account type
#### 5. GetListingsSimilar #### 5. GetListingsSimilar
- **Purpose**: Similar listings for cross-selling - **Purpose**: Similar listings for cross-selling
- **Variables**: `{"listingId": "1705585530", "limit": 10, "isExternalId": false}` - **Variables**: `{"listingId": "1705585530", "limit": 10, "isExternalId": false}`
- **Response**: Array of similar listings with basic metadata - **Response**: Array of similar listings with basic metadata
#### 6. GetGeocodeReverseFromIp #### 6. GetGeocodeReverseFromIp
- **Purpose**: Geolocation-based features - **Purpose**: Geolocation-based features
- **Variables**: `{}` - **Variables**: `{}`
- **Response**: Fails with 404 for most IPs - **Response**: Fails with 404 for most IPs
### Implementation Status ### Implementation Status
The existing `parseListing()` function in `src/kijiji.ts` successfully extracts listing details from embedded Apollo state:
The existing `parseListing()` function in `src/kijiji.ts` successfully extracts listing
details from embedded Apollo state:
- ✅ Extracts title, description, price, location - ✅ Extracts title, description, price, location
- ✅ Handles contact-based pricing ("Please Contact") - ✅ Handles contact-based pricing ("Please Contact")
@@ -427,22 +498,30 @@ The existing `parseListing()` function in `src/kijiji.ts` successfully extracts
- ✅ Works without authentication or API keys - ✅ Works without authentication or API keys
### Key Findings ### Key Findings
1. **No Dedicated Listing API**: Unlike search results, there's no separate GraphQL query for individual listing data
2. **Complete Data Available**: All listing information is embedded in the initial HTML response 1. **No Dedicated Listing API**: Unlike search results, theres no separate GraphQL
3. **Additional Context Fetched**: Secondary GraphQL queries provide complementary data (reviews, similar listings) query for individual listing data
2. **Complete Data Available**: All listing information is embedded in the initial HTML
response
3. **Additional Context Fetched**: Secondary GraphQL queries provide complementary data
(reviews, similar listings)
4. **Consistent Architecture**: Same Apollo state embedding pattern as search pages 4. **Consistent Architecture**: Same Apollo state embedding pattern as search pages
### Current Scraper Implementation ### Current Scraper Implementation
The scraper successfully extracts listing details by: The scraper successfully extracts listing details by:
1. Fetching the listing URL HTML 1. Fetching the listing URL HTML
2. Parsing embedded `__NEXT_DATA__` Apollo state 2. Parsing embedded `__NEXT_DATA__` Apollo state
3. Extracting the `Listing:{id}` object from Apollo cache 3. Extracting the `Listing:{id}` object from Apollo cache
4. Mapping fields to typed `ListingDetails` interface 4. Mapping fields to typed `ListingDetails` interface
This approach works reliably without requiring authentication or dealing with rate limiting on individual listing fetches. This approach works reliably without requiring authentication or dealing with rate
limiting on individual listing fetches.
## Next Steps ## Next Steps
- Explore posting/authentication APIs (requires user login) - Explore posting/authentication APIs (requires user login)
- Investigate if GraphQL API can be used for programmatic access with proper authentication - Investigate if GraphQL API can be used for programmatic access with proper
authentication
- Test rate limiting patterns and optimal scraping strategies - Test rate limiting patterns and optimal scraping strategies
- Document additional category and location ID mappings - Document additional category and location ID mappings

View File

@@ -1 +1,2 @@
# ca-marketplace-scraper # ca-marketplace-scraper

View File

@@ -15,7 +15,10 @@
"linter": { "linter": {
"enabled": true, "enabled": true,
"rules": { "rules": {
"recommended": true "recommended": true,
"correctness": {
"noUnusedImports": "error"
}
} }
}, },
"javascript": { "javascript": {

View File

@@ -32,6 +32,7 @@
"version": "1.0.0", "version": "1.0.0",
"dependencies": { "dependencies": {
"@typescript/native-preview": "catalog:", "@typescript/native-preview": "catalog:",
"argon2-wasm-pro": "1.1.0",
"cli-progress": "^3.12.0", "cli-progress": "^3.12.0",
"linkedom": "^0.18.12", "linkedom": "^0.18.12",
"unidecode": "^1.1.0", "unidecode": "^1.1.0",
@@ -120,6 +121,8 @@
"ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="], "ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="],
"argon2-wasm-pro": ["argon2-wasm-pro@1.1.0", "", {}, "sha512-ApZAKEgbWQILckY+IdjrETB0oTC8L9YHT3JVQhdun77tilExkXNyM/T/qbkvX+Uv68+IQmVwewQwg6yJnSwVxQ=="],
"boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="], "boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="],
"bun-types": ["bun-types@1.3.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-QXKeHLlOLqQX9LgYaHJfzdBaV21T63HhFJnvuRCcjZiaUDpbs5ED1MgxbMra71CsryN/1dAoXuJJJwIv/2drVA=="], "bun-types": ["bun-types@1.3.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-QXKeHLlOLqQX9LgYaHJfzdBaV21T63HhFJnvuRCcjZiaUDpbs5ED1MgxbMra71CsryN/1dAoXuJJJwIv/2drVA=="],

View File

@@ -1,55 +1,18 @@
# Marketplace Cookies Setup # cookies
Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings. ## Scope
## Cookie Configuration - This directory is for cookie setup docs and local examples only.
- Treat any real browser cookie export as a secret, even if already present locally.
Authenticated scrapers now read cookies only from environment variables: ## Runtime Sources
1. `FACEBOOK_COOKIE`
2. `EBAY_COOKIE`
--- - Authenticated scrapers read raw `Cookie` header strings from environment variables such as `FACEBOOK_COOKIE` and `EBAY_COOKIE`.
- Some core entrypoints also accept explicit cookie strings from request/options; explicit input takes precedence over environment values.
## Facebook Marketplace ## Safety Rules
### Required Cookies - Never commit real cookie values, browser exports, or session files.
- `c_user`: Your Facebook user ID - Use placeholder values in docs: `c_user=123; xs=token; fr=request`.
- `xs`: Facebook session token - Do not paste cookie values into logs, tests, fixtures, or generated agent docs.
- `fr`: Facebook request token - If editing this directory, verify diffs do not contain real `c_user`, `xs`, `fr`, `datr`, `sb`, `s`, `ds2`, or `ebay` values.
- `datr`: Data attribution token
- `sb`: Session browser token
### Setup
```bash
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
```
Use the raw `Cookie` header string copied from an authenticated browser session.
---
## eBay
eBay has aggressive bot detection that blocks requests without valid session cookies.
### Setup
```bash
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
```
Use the raw `Cookie` header string copied from an authenticated browser session.
---
## Important Notes
- Cookies must be from active browser sessions
- Cookies expire and need periodic refresh
- **NEVER** commit real cookies to version control
- Platforms may still block automated scraping despite valid cookies
## Security
Do not commit real cookie values or store them in tracked files.

View File

@@ -1,19 +1,26 @@
# opencode Monorepo Config Adoption Implementation Plan # opencode Monorepo Config Adoption Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. > **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Adopt opencode-style monorepo config: Turbo task orchestration, workspace dep catalog, shared root tsconfig, bunfig.toml, and `exports` field in all packages. **Goal:** Adopt opencode-style monorepo config: Turbo task orchestration, workspace dep
catalog, shared root tsconfig, bunfig.toml, and `exports` field in all packages.
**Architecture:** Pure config changes across 10 files — no source code touched. Root config files are added/updated first, then per-package files updated to reference them. Changes are independent within each task and safe to commit atomically. **Architecture:** Pure config changes across 10 files — no source code touched.
Root config files are added/updated first, then per-package files updated to reference
them. Changes are independent within each task and safe to commit atomically.
**Tech Stack:** Bun workspaces, Turbo 2.x, @tsconfig/bun, TypeScript (tsgo / @typescript/native-preview) **Tech Stack:** Bun workspaces, Turbo 2.x, @tsconfig/bun, TypeScript (tsgo /
@typescript/native-preview)
--- * * *
## File Map ## File Map
| File | Action | Responsible for | | File | Action | Responsible for |
|---|---|---| | --- | --- | --- |
| `package.json` | Modify | Workspace catalog, turbo devDep, @tsconfig/bun devDep, updated scripts | | `package.json` | Modify | Workspace catalog, turbo devDep, @tsconfig/bun devDep, updated scripts |
| `turbo.json` | Create | Task graph: typecheck, build, test | | `turbo.json` | Create | Task graph: typecheck, build, test |
| `tsconfig.json` | Create | Shared TS compiler options for all packages | | `tsconfig.json` | Create | Shared TS compiler options for all packages |
@@ -25,14 +32,16 @@
| `packages/api-server/tsconfig.json` | Modify | Slim — extends root, paths only | | `packages/api-server/tsconfig.json` | Modify | Slim — extends root, paths only |
| `packages/mcp-server/tsconfig.json` | Modify | Slim — extends root, paths only | | `packages/mcp-server/tsconfig.json` | Modify | Slim — extends root, paths only |
--- * * *
### Task 1: Add `bunfig.toml` and `turbo.json` ### Task 1: Add `bunfig.toml` and `turbo.json`
Two new root config files with no dependencies on other tasks. Two new root config files with no dependencies on other tasks.
**Files:** **Files:**
- Create: `bunfig.toml` - Create: `bunfig.toml`
- Create: `turbo.json` - Create: `turbo.json`
- [ ] **Step 1: Create `bunfig.toml`** - [ ] **Step 1: Create `bunfig.toml`**
@@ -83,13 +92,15 @@ git add bunfig.toml turbo.json
git commit -m "chore: add bunfig.toml and turbo.json" git commit -m "chore: add bunfig.toml and turbo.json"
``` ```
--- * * *
### Task 2: Create root `tsconfig.json` ### Task 2: Create root `tsconfig.json`
Shared base tsconfig all packages will extend. Extracts the common options currently duplicated in all 3 per-package tsconfigs. Shared base tsconfig all packages will extend.
Extracts the common options currently duplicated in all 3 per-package tsconfigs.
**Files:** **Files:**
- Create: `tsconfig.json` - Create: `tsconfig.json`
- [ ] **Step 1: Create root `tsconfig.json`** - [ ] **Step 1: Create root `tsconfig.json`**
@@ -130,13 +141,15 @@ git add tsconfig.json
git commit -m "chore: add shared root tsconfig.json" git commit -m "chore: add shared root tsconfig.json"
``` ```
--- * * *
### Task 3: Update root `package.json` ### Task 3: Update root `package.json`
Add workspace catalog, `turbo` + `@tsconfig/bun` devDependencies, and update scripts to use `turbo run`. Add workspace catalog, `turbo` + `@tsconfig/bun` devDependencies, and update scripts to
use `turbo run`.
**Files:** **Files:**
- Modify: `package.json` - Modify: `package.json`
- [ ] **Step 1: Replace root `package.json`** - [ ] **Step 1: Replace root `package.json`**
@@ -180,7 +193,11 @@ Write this complete file:
} }
``` ```
> **Note on catalog versions:** The catalog pins exact versions. The values above are taken from the current package installs. If `@types/bun` was `latest`, check `node_modules/@types/bun/package.json` for the actual installed version and use that. Same for `@typescript/native-preview`. > **Note on catalog versions:** The catalog pins exact versions.
> The values above are taken from the current package installs.
> If `@types/bun` was `latest`, check `node_modules/@types/bun/package.json` for the
> actual installed version and use that.
> Same for `@typescript/native-preview`.
- [ ] **Step 2: Check actual installed versions** - [ ] **Step 2: Check actual installed versions**
@@ -208,7 +225,8 @@ Expected: lock file updated, `turbo` and `@tsconfig/bun` appear in `node_modules
bunx turbo run typecheck --dry bunx turbo run typecheck --dry
``` ```
Expected: output lists the `typecheck` task for each package (even if no `typecheck` script exists yet — turbo will note them as skipped/missing). Expected: output lists the `typecheck` task for each package (even if no `typecheck`
script exists yet — turbo will note them as skipped/missing).
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -217,15 +235,19 @@ git add package.json bun.lock
git commit -m "chore: add workspace catalog and turbo to root package.json" git commit -m "chore: add workspace catalog and turbo to root package.json"
``` ```
--- * * *
### Task 4: Update per-package `package.json` files ### Task 4: Update per-package `package.json` files
Rename `type:check``typecheck`, replace `main`/`module` with `exports`, swap pinned dep versions for `catalog:` references. Rename `type:check``typecheck`, replace `main`/`module` with `exports`, swap pinned
dep versions for `catalog:` references.
**Files:** **Files:**
- Modify: `packages/core/package.json` - Modify: `packages/core/package.json`
- Modify: `packages/api-server/package.json` - Modify: `packages/api-server/package.json`
- Modify: `packages/mcp-server/package.json` - Modify: `packages/mcp-server/package.json`
- [ ] **Step 1: Replace `packages/core/package.json`** - [ ] **Step 1: Replace `packages/core/package.json`**
@@ -325,7 +347,9 @@ Rename `type:check` → `typecheck`, replace `main`/`module` with `exports`, swa
bun install bun install
``` ```
Expected: no errors. Catalog refs resolved. `bun.lock` updated. Expected: no errors.
Catalog refs resolved.
`bun.lock` updated.
- [ ] **Step 5: Verify typecheck still works per-package** - [ ] **Step 5: Verify typecheck still works per-package**
@@ -345,15 +369,19 @@ git add packages/core/package.json packages/api-server/package.json packages/mcp
git commit -m "chore: use exports field and catalog refs in all packages" git commit -m "chore: use exports field and catalog refs in all packages"
``` ```
--- * * *
### Task 5: Slim per-package `tsconfig.json` files ### Task 5: Slim per-package `tsconfig.json` files
Replace the duplicated full tsconfig in each package with a slim `extends`-based one pointing to root. Replace the duplicated full tsconfig in each package with a slim `extends`-based one
pointing to root.
**Files:** **Files:**
- Modify: `packages/core/tsconfig.json` - Modify: `packages/core/tsconfig.json`
- Modify: `packages/api-server/tsconfig.json` - Modify: `packages/api-server/tsconfig.json`
- Modify: `packages/mcp-server/tsconfig.json` - Modify: `packages/mcp-server/tsconfig.json`
- [ ] **Step 1: Replace `packages/core/tsconfig.json`** - [ ] **Step 1: Replace `packages/core/tsconfig.json`**
@@ -400,7 +428,8 @@ Replace the duplicated full tsconfig in each package with a slim `extends`-based
- [ ] **Step 4: Verify `@tsconfig/bun` is resolvable** - [ ] **Step 4: Verify `@tsconfig/bun` is resolvable**
The root tsconfig extends `@tsconfig/bun/tsconfig.json`. Confirm the package is installed: The root tsconfig extends `@tsconfig/bun/tsconfig.json`. Confirm the package is
installed:
```bash ```bash
ls node_modules/@tsconfig/bun/tsconfig.json ls node_modules/@tsconfig/bun/tsconfig.json
@@ -414,7 +443,8 @@ Expected: file exists.
bun run typecheck bun run typecheck
``` ```
Expected: Turbo runs `typecheck` for all 3 packages in parallel, all pass (or same pre-existing errors — no new ones). Expected: Turbo runs `typecheck` for all 3 packages in parallel, all pass (or same
pre-existing errors — no new ones).
- [ ] **Step 6: Commit** - [ ] **Step 6: Commit**
@@ -423,7 +453,7 @@ git add packages/core/tsconfig.json packages/api-server/tsconfig.json packages/m
git commit -m "chore: slim per-package tsconfigs to extend root" git commit -m "chore: slim per-package tsconfigs to extend root"
``` ```
--- * * *
### Task 6: Smoke test full build pipeline ### Task 6: Smoke test full build pipeline
@@ -437,7 +467,8 @@ Verify everything works end-to-end.
bun run typecheck bun run typecheck
``` ```
Expected: Turbo runs `typecheck` across all packages. Exit 0. Expected: Turbo runs `typecheck` across all packages.
Exit 0.
- [ ] **Step 2: Run full build** - [ ] **Step 2: Run full build**
@@ -445,7 +476,8 @@ Expected: Turbo runs `typecheck` across all packages. Exit 0.
bun run build bun run build
``` ```
Expected: `dist/` cleaned, Turbo runs `build` (core first, then api-server and mcp-server in parallel), build artifacts appear in `dist/api/` and `dist/mcp/`. Expected: `dist/` cleaned, Turbo runs `build` (core first, then api-server and
mcp-server in parallel), build artifacts appear in `dist/api/` and `dist/mcp/`.
- [ ] **Step 3: Verify dist artifacts** - [ ] **Step 3: Verify dist artifacts**
@@ -461,7 +493,9 @@ Expected: compiled output files in both directories.
grep -c '\^' bun.lock | head -5 grep -c '\^' bun.lock | head -5
``` ```
With `exact = true` in bunfig.toml, new installs won't add `^` ranges. Existing `^` ranges in `bun.lock` from before are fine — they'll be resolved to exact on next fresh install. With `exact = true` in bunfig.toml, new installs wont add `^` ranges.
Existing `^` ranges in `bun.lock` from before are fine — theyll be resolved to exact on
next fresh install.
- [ ] **Step 5: Final commit if any loose files** - [ ] **Step 5: Final commit if any loose files**

View File

@@ -1,53 +1,64 @@
# Cookie Env-Only Implementation Plan # Cookie Env-Only Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. > **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Remove cookie files and request-provided cookie overrides so all authenticated marketplace scraping reads raw `Cookie` header strings only from environment variables. **Goal:** Remove cookie files and request-provided cookie overrides so all authenticated
marketplace scraping reads raw `Cookie` header strings only from environment variables.
**Architecture:** Collapse shared cookie loading to a single env-var reader in `packages/core/src/utils/cookies.ts`, then tighten Facebook and eBay core signatures to stop accepting request/file cookie inputs. Update the API and MCP adapters so they no longer advertise or forward cookie parameters, and rewrite docs/tests to match the env-only contract. **Architecture:** Collapse shared cookie loading to a single env-var reader in
`packages/core/src/utils/cookies.ts`, then tighten Facebook and eBay core signatures to
stop accepting request/file cookie inputs.
Update the API and MCP adapters so they no longer advertise or forward cookie
parameters, and rewrite docs/tests to match the env-only contract.
**Tech Stack:** Bun, TypeScript, Bun test, Biome, workspace package exports **Tech Stack:** Bun, TypeScript, Bun test, Biome, workspace package exports
--- * * *
## File Map ## File Map
- Modify: `packages/core/src/utils/cookies.ts` - Modify: `packages/core/src/utils/cookies.ts` Purpose: remove JSON/file/request-source
Purpose: remove JSON/file/request-source loading and keep env-only cookie parsing/formatting. loading and keep env-only cookie parsing/formatting.
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts` Purpose: drop `cookiesSource` /
Purpose: drop `cookiesSource` / `cookiePath` arguments and env-only error text. `cookiePath` arguments and env-only error text.
- Modify: `packages/core/src/scrapers/ebay.ts` - Modify: `packages/core/src/scrapers/ebay.ts` Purpose: remove `opts.cookies` request
Purpose: remove `opts.cookies` request override and use env-only cookie loading. override and use env-only cookie loading.
- Modify: `packages/core/src/index.ts` - Modify: `packages/core/src/index.ts` Purpose: keep exports aligned with tightened core
Purpose: keep exports aligned with tightened core signatures. signatures.
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts` Purpose: replace missing-file
Purpose: replace missing-file coverage with env-only auth tests. coverage with env-only auth tests.
- Create: `packages/core/test/ebay-core.test.ts` - Create: `packages/core/test/ebay-core.test.ts` Purpose: add dedicated eBay auth
Purpose: add dedicated eBay auth regression coverage instead of mixing it into Facebook tests. regression coverage instead of mixing it into Facebook tests.
- Modify: `packages/api-server/src/routes/facebook.ts` - Modify: `packages/api-server/src/routes/facebook.ts` Purpose: stop parsing/forwarding
Purpose: stop parsing/forwarding `cookies` query params. `cookies` query params.
- Modify: `packages/api-server/src/routes/ebay.ts` - Modify: `packages/api-server/src/routes/ebay.ts` Purpose: stop parsing/forwarding
Purpose: stop parsing/forwarding `cookies` query params. `cookies` query params.
- Create: `packages/api-server/test/routes.test.ts` - Create: `packages/api-server/test/routes.test.ts` Purpose: verify Facebook/eBay routes
Purpose: verify Facebook/eBay routes ignore cookie query params and still call core correctly. ignore cookie query params and still call core correctly.
- Modify: `packages/mcp-server/src/protocol/tools.ts` - Modify: `packages/mcp-server/src/protocol/tools.ts` Purpose: remove Facebook/eBay
Purpose: remove Facebook/eBay cookie tool inputs and descriptions. cookie tool inputs and descriptions.
- Modify: `packages/mcp-server/src/protocol/handler.ts` - Modify: `packages/mcp-server/src/protocol/handler.ts` Purpose: stop mapping removed
Purpose: stop mapping removed cookie tool inputs into API URLs. cookie tool inputs into API URLs.
- Create: `packages/mcp-server/test/protocol.test.ts` - Create: `packages/mcp-server/test/protocol.test.ts` Purpose: verify tool schemas and
Purpose: verify tool schemas and handler URL building no longer include Facebook/eBay cookie fields. handler URL building no longer include Facebook/eBay cookie fields.
- Modify: `cookies/AGENTS.md` - Modify: `cookies/AGENTS.md` Purpose: document env vars as the only supported cookie
Purpose: document env vars as the only supported cookie input. input.
### Task 1: Lock core cookie utilities to env-only loading ### Task 1: Lock core cookie utilities to env-only loading
**Files:** **Files:**
- Modify: `packages/core/src/utils/cookies.ts:19-227` - Modify: `packages/core/src/utils/cookies.ts:19-227`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
Add or replace the auth-source test block in `packages/core/test/facebook-core.test.ts` with env-only expectations: Add or replace the auth-source test block in `packages/core/test/facebook-core.test.ts`
with env-only expectations:
```ts ```ts
test("should load Facebook cookies from FACEBOOK_COOKIE env var", async () => { test("should load Facebook cookies from FACEBOOK_COOKIE env var", async () => {
@@ -85,12 +96,14 @@ test("should reject missing Facebook auth env var", async () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts` Run: `bun test packages/core/test/facebook-core.test.ts` Expected: FAIL because the
Expected: FAIL because the current implementation still allows missing env values to fall through to file/request-based behavior and does not emit the new env-only error. current implementation still allows missing env values to fall through to
file/request-based behavior and does not emit the new env-only error.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
Replace the multi-source loader in `packages/core/src/utils/cookies.ts` with an env-only loader. The target shape is: Replace the multi-source loader in `packages/core/src/utils/cookies.ts` with an env-only
loader. The target shape is:
```ts ```ts
export interface CookieConfig { export interface CookieConfig {
@@ -129,8 +142,8 @@ Delete the now-dead helpers and types that exist only for JSON/file/request load
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts` Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS for the new
Expected: PASS for the new env-only tests. env-only tests.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -142,10 +155,15 @@ git commit -m "refactor: make cookie loading env-only"
### Task 2: Tighten Facebook core APIs to the new contract ### Task 2: Tighten Facebook core APIs to the new contract
**Files:** **Files:**
- Modify: `packages/core/src/scrapers/facebook.ts:23-29` - Modify: `packages/core/src/scrapers/facebook.ts:23-29`
- Modify: `packages/core/src/scrapers/facebook.ts:214-228` - Modify: `packages/core/src/scrapers/facebook.ts:214-228`
- Modify: `packages/core/src/scrapers/facebook.ts:823-929` - Modify: `packages/core/src/scrapers/facebook.ts:823-929`
- Modify: `packages/core/src/index.ts:5-15` - Modify: `packages/core/src/index.ts:5-15`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
@@ -171,8 +189,9 @@ test("should fail Facebook item fetch when FACEBOOK_COOKIE is unset", async () =
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts` Run: `bun test packages/core/test/facebook-core.test.ts` Expected: FAIL because the
Expected: FAIL because the current function signatures and error text still mention parameter/file-based auth paths. current function signatures and error text still mention parameter/file-based auth
paths.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -206,12 +225,14 @@ console.warn(
); );
``` ```
Remove the extra cookie arguments from `fetchFacebookItem(...)` and keep `packages/core/src/index.ts` exporting the tightened functions without the old parameter contract. Remove the extra cookie arguments from `fetchFacebookItem(...)` and keep
`packages/core/src/index.ts` exporting the tightened functions without the old parameter
contract.
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts` Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS with the new
Expected: PASS with the new env-only Facebook API surface. env-only Facebook API surface.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -223,8 +244,11 @@ git commit -m "refactor: remove facebook cookie overrides"
### Task 3: Tighten eBay core APIs to env-only auth ### Task 3: Tighten eBay core APIs to env-only auth
**Files:** **Files:**
- Modify: `packages/core/src/scrapers/ebay.ts:9-15` - Modify: `packages/core/src/scrapers/ebay.ts:9-15`
- Modify: `packages/core/src/scrapers/ebay.ts:337-389` - Modify: `packages/core/src/scrapers/ebay.ts:337-389`
- Create: `packages/core/test/ebay-core.test.ts` - Create: `packages/core/test/ebay-core.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
@@ -249,8 +273,8 @@ test("should warn and continue without eBay cookies when EBAY_COOKIE is unset",
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/ebay-core.test.ts` Run: `bun test packages/core/test/ebay-core.test.ts` Expected: FAIL because
Expected: FAIL because `loadEbayCookies` still accepts request overrides and mentions file/json sources. `loadEbayCookies` still accepts request overrides and mentions file/json sources.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -276,12 +300,13 @@ async function loadEbayCookies(): Promise<string | undefined> {
} }
``` ```
Then remove `cookies` from `fetchEbayItems(..., opts)` and the destructuring that feeds it into `loadEbayCookies()`. Then remove `cookies` from `fetchEbayItems(..., opts)` and the destructuring that feeds
it into `loadEbayCookies()`.
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/ebay-core.test.ts` Run: `bun test packages/core/test/ebay-core.test.ts` Expected: PASS for the eBay
Expected: PASS for the eBay env-only regression coverage. env-only regression coverage.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -293,13 +318,17 @@ git commit -m "refactor: make ebay auth env-only"
### Task 4: Remove cookie query parameters from the API adapter ### Task 4: Remove cookie query parameters from the API adapter
**Files:** **Files:**
- Modify: `packages/api-server/src/routes/facebook.ts:3-33` - Modify: `packages/api-server/src/routes/facebook.ts:3-33`
- Modify: `packages/api-server/src/routes/ebay.ts:3-52` - Modify: `packages/api-server/src/routes/ebay.ts:3-52`
- Create: `packages/api-server/test/routes.test.ts` - Create: `packages/api-server/test/routes.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
Create `packages/api-server/test/routes.test.ts` and mock `@marketplace-scrapers/core` so the route contract is explicit: Create `packages/api-server/test/routes.test.ts` and mock `@marketplace-scrapers/core`
so the route contract is explicit:
```ts ```ts
import { afterEach, describe, expect, mock, test } from "bun:test"; import { afterEach, describe, expect, mock, test } from "bun:test";
@@ -347,8 +376,9 @@ test("ebayRoute ignores cookies query parameter", async () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/api-server/test/routes.test.ts` Run: `bun test packages/api-server/test/routes.test.ts` Expected: FAIL because the
Expected: FAIL because the current routes still parse `reqUrl.searchParams.get("cookies")` and forward it downstream. current routes still parse `reqUrl.searchParams.get("cookies")` and forward it
downstream.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -383,8 +413,8 @@ const items = await fetchEbayItems(SEARCH_QUERY, 1, {
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/api-server/test/routes.test.ts` Run: `bun test packages/api-server/test/routes.test.ts` Expected: PASS for route
Expected: PASS for route coverage and no remaining adapter references to `cookies` for Facebook/eBay. coverage and no remaining adapter references to `cookies` for Facebook/eBay.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -396,13 +426,17 @@ git commit -m "refactor: remove api cookie query overrides"
### Task 5: Remove cookie inputs from MCP tool schemas and request mapping ### Task 5: Remove cookie inputs from MCP tool schemas and request mapping
**Files:** **Files:**
- Modify: `packages/mcp-server/src/protocol/tools.ts:65-148` - Modify: `packages/mcp-server/src/protocol/tools.ts:65-148`
- Modify: `packages/mcp-server/src/protocol/handler.ts:154-211` - Modify: `packages/mcp-server/src/protocol/handler.ts:154-211`
- Create: `packages/mcp-server/test/protocol.test.ts` - Create: `packages/mcp-server/test/protocol.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
Create `packages/mcp-server/test/protocol.test.ts` with schema and URL-building assertions: Create `packages/mcp-server/test/protocol.test.ts` with schema and URL-building
assertions:
```ts ```ts
import { expect, mock, test } from "bun:test"; import { expect, mock, test } from "bun:test";
@@ -445,8 +479,8 @@ expect(calledUrl).not.toContain("cookies=");
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/mcp-server/test/protocol.test.ts` Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: FAIL because the
Expected: FAIL because the current MCP schema and handler still expose and forward those inputs. current MCP schema and handler still expose and forward those inputs.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -465,12 +499,13 @@ Delete the Facebook/eBay cookie tool properties and handler mapping:
// if (args.cookies) params.append("cookies", args.cookies); // if (args.cookies) params.append("cookies", args.cookies);
``` ```
Leave Kijiji alone; this plan only changes Facebook/eBay env-only auth paths defined by the approved spec. Leave Kijiji alone; this plan only changes Facebook/eBay env-only auth paths defined by
the approved spec.
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/mcp-server/test/protocol.test.ts` Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: PASS with MCP
Expected: PASS with MCP definitions and handler mapping in sync. definitions and handler mapping in sync.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -482,12 +517,16 @@ git commit -m "refactor: remove mcp cookie parameters"
### Task 6: Rewrite cookie documentation and run full verification ### Task 6: Rewrite cookie documentation and run full verification
**Files:** **Files:**
- Modify: `cookies/AGENTS.md:9-85` - Modify: `cookies/AGENTS.md:9-85`
- Modify: `docs/superpowers/specs/2026-04-21-cookie-env-only-design.md` only if implementation reveals a spec mismatch
- Modify: `docs/superpowers/specs/2026-04-21-cookie-env-only-design.md` only if
implementation reveals a spec mismatch
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
Treat docs drift as a contract failure. Capture the required state before editing: Treat docs drift as a contract failure.
Capture the required state before editing:
```md ```md
- Cookie setup docs mention env vars only for Facebook and eBay - Cookie setup docs mention env vars only for Facebook and eBay
@@ -497,14 +536,14 @@ Treat docs drift as a contract failure. Capture the required state before editin
- [ ] **Step 2: Run verification to prove current docs are stale** - [ ] **Step 2: Run verification to prove current docs are stale**
Run: `rg -n "facebook\.json|ebay\.json|cookies=" cookies/AGENTS.md` Run: `rg -n "facebook\.json|ebay\.json|cookies=" cookies/AGENTS.md` Expected: matches
Expected: matches found found
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
Rewrite the cookie setup doc so Facebook and eBay each show only env-var setup: Rewrite the cookie setup doc so Facebook and eBay each show only env-var setup:
```md ````md
## Cookie Configuration ## Cookie Configuration
All supported authenticated scrapers read cookies only from environment variables. All supported authenticated scrapers read cookies only from environment variables.
@@ -513,14 +552,14 @@ All supported authenticated scrapers read cookies only from environment variable
```bash ```bash
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request' export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
``` ````
### eBay ### eBay
```bash ```bash
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE' export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
``` ```
``` ````
Remove the file-based and request-parameter sections entirely. Remove the file-based and request-parameter sections entirely.
@@ -534,10 +573,14 @@ Expected: all commands pass
```bash ```bash
git add cookies/AGENTS.md docs/superpowers/specs/2026-04-21-cookie-env-only-design.md git add cookies/AGENTS.md docs/superpowers/specs/2026-04-21-cookie-env-only-design.md
git commit -m "docs: align cookie setup with env-only auth" git commit -m "docs: align cookie setup with env-only auth"
``` ````
## Self-Review ## Self-Review
- Spec coverage check: shared cookie utils, Facebook, eBay, API adapter, MCP adapter, tests, and docs each have explicit tasks. - Spec coverage check: shared cookie utils, Facebook, eBay, API adapter, MCP adapter,
- Placeholder scan: concrete test files are now named for eBay core, API routes, and MCP protocol coverage. tests, and docs each have explicit tasks.
- Type consistency check: `ensureCookies(config)` is the single shared loader name used across Tasks 1-3, and Facebook/eBay route signatures stay aligned with the core changes. - Placeholder scan: concrete test files are now named for eBay core, API routes, and MCP
protocol coverage.
- Type consistency check: `ensureCookies(config)` is the single shared loader name used
across Tasks 1-3, and Facebook/eBay route signatures stay aligned with the core
changes.

View File

@@ -1,34 +1,49 @@
# Facebook Comet Rewrite Implementation Plan # Facebook Comet Rewrite Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. > **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Replace the legacy Facebook Marketplace scraper with a route-aware hybrid Comet-bootstrap parser for both search and item routes. **Goal:** Replace the legacy Facebook Marketplace scraper with a route-aware hybrid
Comet-bootstrap parser for both search and item routes.
**Architecture:** Keep authenticated direct HTTP fetches as the transport. Classify each Facebook response first, then parse route-specific Comet bootstrap/state candidates, and fall back to rendered-HTML extraction only when bootstrap decoding cannot produce the expected search or item shape. **Architecture:** Keep authenticated direct HTTP fetches as the transport.
Classify each Facebook response first, then parse route-specific Comet bootstrap/state
candidates, and fall back to rendered-HTML extraction only when bootstrap decoding
cannot produce the expected search or item shape.
**Tech Stack:** Bun, TypeScript, `bun:test`, `linkedom`, existing shared cookie/http helpers **Tech Stack:** Bun, TypeScript, `bun:test`, `linkedom`, existing shared cookie/http
helpers
--- * * *
## File Structure ## File Structure
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Owns Facebook fetch flow, response classification, bootstrap candidate extraction, search parsing, item parsing, and HTML fallbacks. - Owns Facebook fetch flow, response classification, bootstrap candidate extraction,
search parsing, item parsing, and HTML fallbacks.
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Owns unit coverage for response classification, bootstrap parsing, fallback parsing, and route-aware item/search extraction behavior. - Owns unit coverage for response classification, bootstrap parsing, fallback parsing,
and route-aware item/search extraction behavior.
- Modify: `packages/core/test/facebook-integration.test.ts` - Modify: `packages/core/test/facebook-integration.test.ts`
- Owns higher-level fetch flow tests, auth/degradation behavior, and result shaping for search/item entrypoints. - Owns higher-level fetch flow tests, auth/degradation behavior, and result shaping
for search/item entrypoints.
### Task 1: Add Route Classification Coverage ### Task 1: Add Route Classification Coverage
**Files:** **Files:**
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
Add these tests near the Facebook parser tests in `packages/core/test/facebook-core.test.ts`: Add these tests near the Facebook parser tests in
`packages/core/test/facebook-core.test.ts`:
```ts ```ts
test("classifies Comet search responses", () => { test("classifies Comet search responses", () => {
@@ -89,12 +104,14 @@ test("classifies unavailable item responses", () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
Expected: FAIL because `classifyFacebookResponse` does not exist yet. Expected: FAIL because `classifyFacebookResponse` does not exist yet.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
Add this type and function near the parsing section in `packages/core/src/scrapers/facebook.ts`: Add this type and function near the parsing section in
`packages/core/src/scrapers/facebook.ts`:
```ts ```ts
type FacebookResponseKind = "search" | "item" | "auth_gated" | "unavailable" | "unknown"; type FacebookResponseKind = "search" | "item" | "auth_gated" | "unavailable" | "unknown";
@@ -128,7 +145,8 @@ export function classifyFacebookResponse(htmlString: HTMLString, responseUrl: st
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
Expected: PASS Expected: PASS
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -141,8 +159,11 @@ git commit -m "refactor: add facebook response classification"
### Task 2: Add Bootstrap Candidate Extraction ### Task 2: Add Bootstrap Candidate Extraction
**Files:** **Files:**
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
@@ -185,7 +206,8 @@ test("keeps candidate order stable for later scoring", () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
Expected: FAIL because `extractFacebookBootstrapCandidates` does not exist. Expected: FAIL because `extractFacebookBootstrapCandidates` does not exist.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -218,7 +240,8 @@ export function extractFacebookBootstrapCandidates(htmlString: HTMLString): Reco
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
Expected: PASS Expected: PASS
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -231,10 +254,15 @@ git commit -m "refactor: add facebook bootstrap candidate extraction"
### Task 3: Replace Search Parsing With Candidate Scoring ### Task 3: Replace Search Parsing With Candidate Scoring
**Files:** **Files:**
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/test/facebook-integration.test.ts` - Modify: `packages/core/test/facebook-integration.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- Test: `packages/core/test/facebook-integration.test.ts` - Test: `packages/core/test/facebook-integration.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
@@ -323,12 +351,15 @@ const mockSearchHtml = `
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet bootstrap candidates"` Run:
Expected: FAIL because the current search extractor only understands legacy `marketplace_search` shapes. `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet bootstrap candidates"`
Expected: FAIL because the current search extractor only understands legacy
`marketplace_search` shapes.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
Replace the search extraction internals in `extractFacebookMarketplaceData()` with candidate scoring like this: Replace the search extraction internals in `extractFacebookMarketplaceData()` with
candidate scoring like this:
```ts ```ts
function findSearchEdges(candidate: unknown): FacebookEdge[] | null { function findSearchEdges(candidate: unknown): FacebookEdge[] | null {
@@ -383,7 +414,8 @@ export function extractFacebookMarketplaceData(htmlString: HTMLString): Facebook
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts` Run:
`bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
Expected: PASS for the rewritten search fixtures and existing unaffected tests. Expected: PASS for the rewritten search fixtures and existing unaffected tests.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -396,8 +428,11 @@ git commit -m "refactor: rewrite facebook search parser for comet bootstrap"
### Task 4: Replace Item Parsing With Candidate Scoring ### Task 4: Replace Item Parsing With Candidate Scoring
**Files:** **Files:**
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
@@ -438,7 +473,8 @@ test("extracts item details from Comet permalink bootstrap candidates", () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet permalink bootstrap"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet permalink bootstrap"`
Expected: FAIL because the current item extractor depends on legacy permalink markers. Expected: FAIL because the current item extractor depends on legacy permalink markers.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -491,8 +527,8 @@ export function extractFacebookItemData(htmlString: HTMLString): FacebookMarketp
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts` Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS for
Expected: PASS for current-shape item tests and remaining parser tests. current-shape item tests and remaining parser tests.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -504,8 +540,11 @@ git commit -m "refactor: rewrite facebook item parser for comet bootstrap"
### Task 5: Add HTML Fallback Extraction ### Task 5: Add HTML Fallback Extraction
**Files:** **Files:**
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-core.test.ts` - Test: `packages/core/test/facebook-core.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
@@ -549,8 +588,10 @@ test("falls back to rendered item HTML when bootstrap payloads are undecodable",
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"` Run:
Expected: FAIL because the extractor currently returns `null` without a structured candidate. `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
Expected: FAIL because the extractor currently returns `null` without a structured
candidate.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -607,11 +648,13 @@ function extractItemFallback(htmlString: HTMLString): FacebookMarketplaceItem |
} }
``` ```
Then call these helpers as the last fallback inside `extractFacebookMarketplaceData()` and `extractFacebookItemData()`. Then call these helpers as the last fallback inside `extractFacebookMarketplaceData()`
and `extractFacebookItemData()`.
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"` Run:
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
Expected: PASS Expected: PASS
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -624,8 +667,11 @@ git commit -m "refactor: add facebook html fallbacks"
### Task 6: Wire Route-Aware Failures Into Entry Points ### Task 6: Wire Route-Aware Failures Into Entry Points
**Files:** **Files:**
- Modify: `packages/core/test/facebook-integration.test.ts` - Modify: `packages/core/test/facebook-integration.test.ts`
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Test: `packages/core/test/facebook-integration.test.ts` - Test: `packages/core/test/facebook-integration.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
@@ -664,8 +710,10 @@ test("returns null for unavailable item responses", async () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/facebook-integration.test.ts --test-name-pattern "auth-gated|unavailable"` Run:
Expected: FAIL because the entrypoints do not yet classify successful HTML responses by route/auth state. `bun test packages/core/test/facebook-integration.test.ts --test-name-pattern "auth-gated|unavailable"`
Expected: FAIL because the entrypoints do not yet classify successful HTML responses by
route/auth state.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -690,12 +738,13 @@ if (itemResponseClass.kind === "unavailable") {
} }
``` ```
Use the actual response URL from `fetchHtml` plumbing if that helper is extended to return both HTML and final URL; otherwise start by threading final URL support through the fetch helper in the same task. Use the actual response URL from `fetchHtml` plumbing if that helper is extended to
return both HTML and final URL; otherwise start by threading final URL support through
the fetch helper in the same task.
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/facebook-integration.test.ts` Run: `bun test packages/core/test/facebook-integration.test.ts` Expected: PASS
Expected: PASS
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -707,19 +756,22 @@ git commit -m "refactor: handle facebook route-aware failure states"
### Task 7: Run Full Verification And Live Probe ### Task 7: Run Full Verification And Live Probe
**Files:** **Files:**
- Modify: `packages/core/src/scrapers/facebook.ts` if small cleanup is required - Modify: `packages/core/src/scrapers/facebook.ts` if small cleanup is required
- Modify: `packages/core/test/facebook-core.test.ts` if small cleanup is required - Modify: `packages/core/test/facebook-core.test.ts` if small cleanup is required
- Modify: `packages/core/test/facebook-integration.test.ts` if small cleanup is required - Modify: `packages/core/test/facebook-integration.test.ts` if small cleanup is required
- [ ] **Step 1: Run focused Facebook tests** - [ ] **Step 1: Run focused Facebook tests**
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts` Run:
`bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
Expected: PASS Expected: PASS
- [ ] **Step 2: Run broader core tests** - [ ] **Step 2: Run broader core tests**
Run: `bun test packages/core/test` Run: `bun test packages/core/test` Expected: PASS
Expected: PASS
- [ ] **Step 3: Run live authenticated Facebook probe** - [ ] **Step 3: Run live authenticated Facebook probe**
@@ -742,11 +794,14 @@ if (results[0]?.url) {
Expected: Expected:
- search returns at least one result - search returns at least one result
- item fetch returns non-null for the first live result when the route is not stale/unavailable
- item fetch returns non-null for the first live result when the route is not
stale/unavailable
- [ ] **Step 4: Make any minimal cleanup needed to keep tests and live probe green** - [ ] **Step 4: Make any minimal cleanup needed to keep tests and live probe green**
If cleanup is needed, keep it limited to naming, dead-code removal caused by the rewrite, or small parser corrections directly exposed by the verification commands. If cleanup is needed, keep it limited to naming, dead-code removal caused by the
rewrite, or small parser corrections directly exposed by the verification commands.
- [ ] **Step 5: Re-run verification** - [ ] **Step 5: Re-run verification**
@@ -767,6 +822,11 @@ git commit -m "refactor: complete facebook comet scraper rewrite"
## Self-Review ## Self-Review
- Spec coverage: the plan covers classification, route-aware search parsing, route-aware item parsing, HTML fallbacks, explicit failure-state handling, test replacement, and live verification. - Spec coverage: the plan covers classification, route-aware search parsing, route-aware
- Placeholder scan: no `TODO`, `TBD`, or unspecified “handle appropriately” steps remain. item parsing, HTML fallbacks, explicit failure-state handling, test replacement, and
- Type consistency: all planned functions and types use the same names across tasks: `classifyFacebookResponse`, `extractFacebookBootstrapCandidates`, `extractFacebookMarketplaceData`, and `extractFacebookItemData`. live verification.
- Placeholder scan: no `TODO`, `TBD`, or unspecified “handle appropriately” steps
remain.
- Type consistency: all planned functions and types use the same names across tasks:
`classifyFacebookResponse`, `extractFacebookBootstrapCandidates`,
`extractFacebookMarketplaceData`, and `extractFacebookItemData`.

View File

@@ -1,63 +1,75 @@
# Unstable Listing Mode Implementation Plan # Unstable Listing Mode Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. > **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Add an optional shared mode across Facebook, eBay, and Kijiji that moves listings priced below 80% of the median into `unstableResults`, while preserving current default response shapes. **Goal:** Add an optional shared mode across Facebook, eBay, and Kijiji that moves
listings priced below 80% of the median into `unstableResults`, while preserving current
default response shapes.
**Architecture:** Introduce a shared generic classifier in `packages/core` that splits any listing array into `results` and `unstableResults` using the same median-based rule. Then thread one opt-in flag through the scraper entrypoints, API routes, and MCP tool definitions so all surfaces expose the same behavior without changing existing defaults. **Architecture:** Introduce a shared generic classifier in `packages/core` that splits
any listing array into `results` and `unstableResults` using the same median-based rule.
Then thread one opt-in flag through the scraper entrypoints, API routes, and MCP tool
definitions so all surfaces expose the same behavior without changing existing defaults.
**Tech Stack:** Bun, TypeScript, Bun test, workspace packages, JSON-RPC MCP server **Tech Stack:** Bun, TypeScript, Bun test, workspace packages, JSON-RPC MCP server
--- * * *
## File Map ## File Map
- Create: `packages/core/src/utils/unstable.ts` - Create: `packages/core/src/utils/unstable.ts` Purpose: shared generic median/cutoff
Purpose: shared generic median/cutoff classifier for listing arrays. classifier for listing arrays.
- Modify: `packages/core/src/types/common.ts` - Modify: `packages/core/src/types/common.ts` Purpose: add shared mode types used by
Purpose: add shared mode types used by scrapers and adapters. scrapers and adapters.
- Modify: `packages/core/src/index.ts` - Modify: `packages/core/src/index.ts` Purpose: export the new shared classifier/types.
Purpose: export the new shared classifier/types. - Modify: `packages/core/src/scrapers/facebook.ts` Purpose: add the optional mode flag
- Modify: `packages/core/src/scrapers/facebook.ts` and return bucketed results when enabled.
Purpose: add the optional mode flag and return bucketed results when enabled. - Modify: `packages/core/src/scrapers/ebay.ts` Purpose: add the optional mode flag and
- Modify: `packages/core/src/scrapers/ebay.ts` return bucketed results when enabled.
Purpose: add the optional mode flag and return bucketed results when enabled. - Modify: `packages/core/src/scrapers/kijiji.ts` Purpose: add the optional mode flag and
- Modify: `packages/core/src/scrapers/kijiji.ts` return bucketed results when enabled.
Purpose: add the optional mode flag and return bucketed results when enabled. - Create: `packages/core/test/unstable-listing-mode.test.ts` Purpose: lock the shared
- Create: `packages/core/test/unstable-listing-mode.test.ts` classifier behavior with direct unit tests.
Purpose: lock the shared classifier behavior with direct unit tests. - Modify: `packages/core/test/facebook-core.test.ts` Purpose: prove Facebook preserves
- Modify: `packages/core/test/facebook-core.test.ts` default arrays and returns buckets when enabled.
Purpose: prove Facebook preserves default arrays and returns buckets when enabled. - Modify: `packages/core/test/ebay-core.test.ts` Purpose: prove eBay preserves default
- Modify: `packages/core/test/ebay-core.test.ts` arrays and returns buckets when enabled.
Purpose: prove eBay preserves default arrays and returns buckets when enabled. - Modify: `packages/core/test/kijiji-core.test.ts` Purpose: prove Kijiji preserves
- Modify: `packages/core/test/kijiji-core.test.ts` default arrays and returns buckets when enabled.
Purpose: prove Kijiji preserves default arrays and returns buckets when enabled. - Modify: `packages/api-server/src/routes/facebook.ts` Purpose: expose a shared opt-in
- Modify: `packages/api-server/src/routes/facebook.ts` query parameter and preserve default response shape.
Purpose: expose a shared opt-in query parameter and preserve default response shape. - Modify: `packages/api-server/src/routes/ebay.ts` Purpose: expose the same query
- Modify: `packages/api-server/src/routes/ebay.ts` parameter and preserve default response shape.
Purpose: expose the same query parameter and preserve default response shape. - Modify: `packages/api-server/src/routes/kijiji.ts` Purpose: expose the same query
- Modify: `packages/api-server/src/routes/kijiji.ts` parameter and preserve default response shape.
Purpose: expose the same query parameter and preserve default response shape. - Modify: `packages/api-server/test/routes.test.ts` Purpose: verify route forwarding and
- Modify: `packages/api-server/test/routes.test.ts` route response-shape switching.
Purpose: verify route forwarding and route response-shape switching. - Modify: `packages/mcp-server/src/protocol/tools.ts` Purpose: document the optional
- Modify: `packages/mcp-server/src/protocol/tools.ts` unstable mode in all search tools.
Purpose: document the optional unstable mode in all search tools. - Modify: `packages/mcp-server/src/protocol/handler.ts` Purpose: forward the optional
- Modify: `packages/mcp-server/src/protocol/handler.ts` mode to API routes for all search tools.
Purpose: forward the optional mode to API routes for all search tools. - Modify: `packages/mcp-server/test/protocol.test.ts` Purpose: verify MCP tool metadata
- Modify: `packages/mcp-server/test/protocol.test.ts` and forwarded URLs include the new option.
Purpose: verify MCP tool metadata and forwarded URLs include the new option.
### Task 1: Add the shared unstable-listing classifier ### Task 1: Add the shared unstable-listing classifier
**Files:** **Files:**
- Create: `packages/core/src/utils/unstable.ts` - Create: `packages/core/src/utils/unstable.ts`
- Modify: `packages/core/src/types/common.ts` - Modify: `packages/core/src/types/common.ts`
- Modify: `packages/core/src/index.ts` - Modify: `packages/core/src/index.ts`
- Test: `packages/core/test/unstable-listing-mode.test.ts` - Test: `packages/core/test/unstable-listing-mode.test.ts`
- [ ] **Step 1: Write the failing test** - [ ] **Step 1: Write the failing test**
Create `packages/core/test/unstable-listing-mode.test.ts` with focused shared-behavior coverage: Create `packages/core/test/unstable-listing-mode.test.ts` with focused shared-behavior
coverage:
```ts ```ts
import { describe, expect, test } from "bun:test"; import { describe, expect, test } from "bun:test";
@@ -127,8 +139,8 @@ describe("classifyUnstableListings", () => {
- [ ] **Step 2: Run test to verify it fails** - [ ] **Step 2: Run test to verify it fails**
Run: `bun test packages/core/test/unstable-listing-mode.test.ts` Run: `bun test packages/core/test/unstable-listing-mode.test.ts` Expected: FAIL because
Expected: FAIL because `classifyUnstableListings` and the shared mode types do not exist yet. `classifyUnstableListings` and the shared mode types do not exist yet.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -202,8 +214,8 @@ export { classifyUnstableListings } from "./utils/unstable";
- [ ] **Step 4: Run test to verify it passes** - [ ] **Step 4: Run test to verify it passes**
Run: `bun test packages/core/test/unstable-listing-mode.test.ts` Run: `bun test packages/core/test/unstable-listing-mode.test.ts` Expected: PASS with 4
Expected: PASS with 4 passing tests. passing tests.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -215,16 +227,24 @@ git commit -m "feat: add shared unstable listing classifier"
### Task 2: Thread the optional mode through all core scrapers ### Task 2: Thread the optional mode through all core scrapers
**Files:** **Files:**
- Modify: `packages/core/src/scrapers/facebook.ts` - Modify: `packages/core/src/scrapers/facebook.ts`
- Modify: `packages/core/src/scrapers/ebay.ts` - Modify: `packages/core/src/scrapers/ebay.ts`
- Modify: `packages/core/src/scrapers/kijiji.ts` - Modify: `packages/core/src/scrapers/kijiji.ts`
- Modify: `packages/core/test/facebook-core.test.ts` - Modify: `packages/core/test/facebook-core.test.ts`
- Modify: `packages/core/test/ebay-core.test.ts` - Modify: `packages/core/test/ebay-core.test.ts`
- Modify: `packages/core/test/kijiji-core.test.ts` - Modify: `packages/core/test/kijiji-core.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
Add one focused opt-in test per scraper. Use the new shared classifier through the public scraper entrypoints instead of testing internal helpers. Add one focused opt-in test per scraper.
Use the new shared classifier through the public scraper entrypoints instead of testing
internal helpers.
In `packages/core/test/facebook-core.test.ts`, add: In `packages/core/test/facebook-core.test.ts`, add:
@@ -286,7 +306,8 @@ test("fetchKijijiItems returns stable and unstable buckets when unstable mode is
}); });
``` ```
Also add one default-mode assertion in one existing scraper test file, for example in `packages/core/test/facebook-core.test.ts`: Also add one default-mode assertion in one existing scraper test file, for example in
`packages/core/test/facebook-core.test.ts`:
```ts ```ts
test("fetchFacebookItems keeps returning an array by default", async () => { test("fetchFacebookItems keeps returning an array by default", async () => {
@@ -307,8 +328,10 @@ test("fetchFacebookItems keeps returning an array by default", async () => {
- [ ] **Step 2: Run tests to verify they fail** - [ ] **Step 2: Run tests to verify they fail**
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts` Run:
Expected: FAIL because the scraper signatures do not yet accept the new option and still always return arrays. `bun test packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts`
Expected: FAIL because the scraper signatures do not yet accept the new option and still
always return arrays.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -322,7 +345,8 @@ import {
} from "../index"; } from "../index";
``` ```
In `packages/core/src/scrapers/facebook.ts`, extend the default export signature and branch at the end: In `packages/core/src/scrapers/facebook.ts`, extend the default export signature and
branch at the end:
```ts ```ts
export default async function fetchFacebookItems( export default async function fetchFacebookItems(
@@ -371,7 +395,8 @@ export default async function fetchEbayItems(
} }
``` ```
In `packages/core/src/scrapers/kijiji.ts`, add the same final argument after `listingOptions`: In `packages/core/src/scrapers/kijiji.ts`, add the same final argument after
`listingOptions`:
```ts ```ts
export default async function fetchKijijiItems( export default async function fetchKijijiItems(
@@ -392,12 +417,15 @@ export default async function fetchKijijiItems(
} }
``` ```
Keep the default branch untouched in all three files so existing callers still receive arrays. Keep the default branch untouched in all three files so existing callers still receive
arrays.
- [ ] **Step 4: Run tests to verify they pass** - [ ] **Step 4: Run tests to verify they pass**
Run: `bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts` Run:
Expected: PASS, including the new opt-in bucket assertions and the default-array regression assertion. `bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts`
Expected: PASS, including the new opt-in bucket assertions and the default-array
regression assertion.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -409,14 +437,19 @@ git commit -m "feat: add unstable mode to scraper results"
### Task 3: Expose unstable mode in API routes ### Task 3: Expose unstable mode in API routes
**Files:** **Files:**
- Modify: `packages/api-server/src/routes/facebook.ts` - Modify: `packages/api-server/src/routes/facebook.ts`
- Modify: `packages/api-server/src/routes/ebay.ts` - Modify: `packages/api-server/src/routes/ebay.ts`
- Modify: `packages/api-server/src/routes/kijiji.ts` - Modify: `packages/api-server/src/routes/kijiji.ts`
- Modify: `packages/api-server/test/routes.test.ts` - Modify: `packages/api-server/test/routes.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
Extend `packages/api-server/test/routes.test.ts` with route-forwarding coverage for the new query parameter: Extend `packages/api-server/test/routes.test.ts` with route-forwarding coverage for the
new query parameter:
```ts ```ts
test("facebookRoute forwards unstableFilter=true to core", async () => { test("facebookRoute forwards unstableFilter=true to core", async () => {
@@ -480,8 +513,8 @@ test("kijijiRoute forwards unstableFilter=true to core", async () => {
- [ ] **Step 2: Run tests to verify they fail** - [ ] **Step 2: Run tests to verify they fail**
Run: `bun test packages/api-server/test/routes.test.ts` Run: `bun test packages/api-server/test/routes.test.ts` Expected: FAIL because the
Expected: FAIL because the routes do not yet parse or forward `unstableFilter`. routes do not yet parse or forward `unstableFilter`.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
@@ -533,12 +566,14 @@ const items = await fetchKijijiItems(
); );
``` ```
Do not add any response wrapper logic in the routes; simply return whatever the core scraper returns so the default array path remains unchanged. Do not add any response wrapper logic in the routes; simply return whatever the core
scraper returns so the default array path remains unchanged.
- [ ] **Step 4: Run tests to verify they pass** - [ ] **Step 4: Run tests to verify they pass**
Run: `bun test packages/api-server/test/routes.test.ts` Run: `bun test packages/api-server/test/routes.test.ts` Expected: PASS, including
Expected: PASS, including existing cookie-parameter regression tests and the new unstable-mode forwarding assertions. existing cookie-parameter regression tests and the new unstable-mode forwarding
assertions.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -550,13 +585,17 @@ git commit -m "feat: expose unstable mode in api routes"
### Task 4: Document and forward unstable mode in MCP tools ### Task 4: Document and forward unstable mode in MCP tools
**Files:** **Files:**
- Modify: `packages/mcp-server/src/protocol/tools.ts` - Modify: `packages/mcp-server/src/protocol/tools.ts`
- Modify: `packages/mcp-server/src/protocol/handler.ts` - Modify: `packages/mcp-server/src/protocol/handler.ts`
- Modify: `packages/mcp-server/test/protocol.test.ts` - Modify: `packages/mcp-server/test/protocol.test.ts`
- [ ] **Step 1: Write the failing tests** - [ ] **Step 1: Write the failing tests**
Extend `packages/mcp-server/test/protocol.test.ts` with metadata and forwarding coverage: Extend `packages/mcp-server/test/protocol.test.ts` with metadata and forwarding
coverage:
```ts ```ts
test("search tools document unstable listing mode", () => { test("search tools document unstable listing mode", () => {
@@ -601,12 +640,14 @@ Mirror the forwarding assertion for `search_kijiji` and `search_ebay` in the sam
- [ ] **Step 2: Run tests to verify they fail** - [ ] **Step 2: Run tests to verify they fail**
Run: `bun test packages/mcp-server/test/protocol.test.ts` Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: FAIL because the
Expected: FAIL because the tools do not yet describe `unstableFilter` and the handler does not append it to API URLs. tools do not yet describe `unstableFilter` and the handler does not append it to API
URLs.
- [ ] **Step 3: Write minimal implementation** - [ ] **Step 3: Write minimal implementation**
In `packages/mcp-server/src/protocol/tools.ts`, add the same optional property to all three tools: In `packages/mcp-server/src/protocol/tools.ts`, add the same optional property to all
three tools:
```ts ```ts
unstableFilter: { unstableFilter: {
@@ -617,7 +658,8 @@ unstableFilter: {
}, },
``` ```
In `packages/mcp-server/src/protocol/handler.ts`, append the shared flag in each search branch: In `packages/mcp-server/src/protocol/handler.ts`, append the shared flag in each search
branch:
```ts ```ts
if (args.unstableFilter !== undefined) { if (args.unstableFilter !== undefined) {
@@ -629,8 +671,8 @@ Add that snippet to the `search_kijiji`, `search_facebook`, and `search_ebay` br
- [ ] **Step 4: Run tests to verify they pass** - [ ] **Step 4: Run tests to verify they pass**
Run: `bun test packages/mcp-server/test/protocol.test.ts` Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: PASS, including the
Expected: PASS, including the new tool-schema assertions and URL-forwarding assertions. new tool-schema assertions and URL-forwarding assertions.
- [ ] **Step 5: Commit** - [ ] **Step 5: Commit**
@@ -642,21 +684,23 @@ git commit -m "docs: expose unstable mode in mcp tools"
### Task 5: Verify the full cross-package feature end to end ### Task 5: Verify the full cross-package feature end to end
**Files:** **Files:**
- No code changes expected. - No code changes expected.
- [ ] **Step 1: Run the focused package tests** - [ ] **Step 1: Run the focused package tests**
Run: `bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts packages/api-server/test/routes.test.ts packages/mcp-server/test/protocol.test.ts` Run:
`bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts packages/api-server/test/routes.test.ts packages/mcp-server/test/protocol.test.ts`
Expected: PASS with zero failing tests. Expected: PASS with zero failing tests.
- [ ] **Step 2: Run the broader workspace verification** - [ ] **Step 2: Run the broader workspace verification**
Run: `bun run ci` Run: `bun run ci` Expected: PASS with clean workspace validation.
Expected: PASS with clean workspace validation.
- [ ] **Step 3: Commit verification-only follow-ups if needed** - [ ] **Step 3: Commit verification-only follow-ups if needed**
If verification forced any tiny fixes, commit them immediately after the fix with a focused message, for example: If verification forced any tiny fixes, commit them immediately after the fix with a
focused message, for example:
```bash ```bash
git add <exact files changed> git add <exact files changed>
@@ -667,6 +711,8 @@ If no files changed during verification, skip this commit step.
## Self-Review ## Self-Review
- Spec coverage: shared classifier, all three scrapers, API exposure, MCP documentation, and tests are each mapped to a task. - Spec coverage: shared classifier, all three scrapers, API exposure, MCP documentation,
- Placeholder scan: no `TODO`, `TBD`, or "write tests later" placeholders remain. and tests are each mapped to a task.
- Type consistency: the plan uses one shared flag name, `unstableFilter`, and one shared core option, `hideUnstableResults`, across all tasks. - Placeholder scan: no `TODO`, `TBD`, or “write tests later” placeholders remain.
- Type consistency: the plan uses one shared flag name, `unstableFilter`, and one shared
core option, `hideUnstableResults`, across all tasks.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,110 @@
# Marketplace Dollar Price Inputs Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to
> implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make public marketplace price inputs use dollars while preserving core scraper
cent-based filtering.
**Architecture:** API server owns HTTP query parsing and converts dollar amounts to
cents before calling core.
MCP server keeps forwarding numeric dollar values as query params.
Core scraper internals remain unchanged because parsed listing prices already use cents.
This applies to eBay `minPrice`/`maxPrice` and Kijiji `priceMin`/`priceMax`; Facebook
exposes no price filter inputs.
**Tech Stack:** Bun, TypeScript, `bun:test`, MCP JSON-RPC adapter, framework-free Bun
HTTP routes.
* * *
### Task 1: API Dollar Parsing
**Files:**
- Modify: `packages/api-server/src/routes/helpers.ts`
- Modify: `packages/api-server/src/routes/ebay.ts`
- Modify: `packages/api-server/src/routes/kijiji.ts`
- Test: `packages/api-server/test/routes.test.ts`
- [ ] **Step 1: Add failing API route tests**
Add tests proving eBay `minPrice=999.99` / `maxPrice=1000` and Kijiji `priceMin=999.99`
/ `priceMax=1000` are forwarded to core as `99999` and `100000` cents.
Add validation tests for empty, whitespace, negative, hex, mixed text, and malformed
decimal price values.
Run: `bun test packages/api-server/test/routes.test.ts`
Expected: new forwarding tests fail because route currently rejects decimals and
forwards integer dollars unchanged.
- [ ] **Step 2: Implement dollar parser helper**
Add `parseDollarPriceParam(searchParams, name)` in
`packages/api-server/src/routes/helpers.ts`. Accept `0`, `1000`, `999.99`, and `0.99`.
Reject values that do not match `^\d+(?:\.\d{1,2})?$`. Convert to cents with
`Math.round(Number(rawValue) * 100)`.
- [ ] **Step 3: Use dollar parser in eBay route**
Replace `parseNonNegativeIntegerParam` calls for eBay `minPrice`/`maxPrice` and Kijiji
`priceMin`/`priceMax` with `parseDollarPriceParam`. Keep pagination/count params on
integer parsing.
- [ ] **Step 4: Verify API tests**
Run: `bun test packages/api-server/test/routes.test.ts`
Expected: all API route tests pass.
### Task 2: MCP Schema Contract
**Files:**
- Modify: `packages/mcp-server/src/protocol/tools.ts`
- Test: `packages/mcp-server/test/protocol.test.ts`
- [ ] **Step 1: Add MCP schema/forwarding tests**
Add tests that `search_ebay` describes `minPrice` and `maxPrice` as dollar filters and
forwards numeric dollar values unchanged in API query params.
Run: `bun test packages/mcp-server/test/protocol.test.ts`
Expected: description test fails until schema text changes; forwarding behavior should
already pass or reveal mapping gaps.
- [ ] **Step 2: Update tool descriptions**
Change eBay `minPrice` and Kijiji `priceMin` descriptions to `Minimum price in dollars`.
Change eBay `maxPrice` and Kijiji `priceMax` descriptions to `Maximum price in dollars`.
- [ ] **Step 3: Verify MCP tests**
Run: `bun test packages/mcp-server/test/protocol.test.ts`
Expected: all MCP protocol tests pass.
### Task 3: Cross-Package Verification
**Files:**
- No additional edits expected.
- [ ] **Step 1: Run relevant package tests**
Run: `bun test packages/api-server/test packages/mcp-server/test`
Expected: all tests pass.
- [ ] **Step 2: Run CI**
Run: `bun run ci`
Expected: typecheck and Biome pass without changing lint config.

View File

@@ -0,0 +1,187 @@
# Live Parser Tests Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
> to implement this plan task-by-task.
> Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Add explicit live endpoint test suites for each core marketplace scraper,
excluded from default tests and runnable through one script.
**Architecture:** Live tests live under `packages/core/test/live/` and import public
scraper entry points directly.
Normal package tests remain offline because the new files are outside current explicit
test commands and run only through `bun run test:live`.
**Tech Stack:** Bun `1.3.13`, `bun:test`, TypeScript, existing core scraper APIs.
* * *
## File Structure
- Create `packages/core/test/live/ebay.live.test.ts`: live eBay search smoke test
against `fetchEbayItems`.
- Create `packages/core/test/live/kijiji.live.test.ts`: live Kijiji search smoke test
against `fetchKijijiItems`.
- Create `packages/core/test/live/facebook.live.test.ts`: strict live Facebook search
smoke test against `fetchFacebookItems` and `FACEBOOK_COOKIE`.
- Modify `package.json`: add root script `test:live` running all files under
`packages/core/test/live`.
### Task 1: Add eBay Live Suite
**Files:**
- Create: `packages/core/test/live/ebay.live.test.ts`
- [ ] **Step 1: Write the live test file**
```ts
import { describe, expect, test } from "bun:test";
import fetchEbayItems from "../../src/scrapers/ebay";
describe("eBay live parser", () => {
test("scrapes live search results into listing details", async () => {
const results = await fetchEbayItems("iphone", 1, { maxItems: 3 });
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
expect(listing.url).toStartWith("https://");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
});
});
```
- [ ] **Step 2: Run eBay live test**
Run: `bun test packages/core/test/live/ebay.live.test.ts` Expected: PASS when eBay
returns parseable search results; FAIL on endpoint/rate-limit/parser breakage.
### Task 2: Add Kijiji Live Suite
**Files:**
- Create: `packages/core/test/live/kijiji.live.test.ts`
- [ ] **Step 1: Write the live test file**
```ts
import { describe, expect, test } from "bun:test";
import fetchKijijiItems from "../../src/scrapers/kijiji";
describe("Kijiji live parser", () => {
test("scrapes live search results into detailed listings", async () => {
const results = await fetchKijijiItems(
"iphone",
1,
"https://www.kijiji.ca",
{ maxPages: 1 },
{ includeImages: false, sellerDataDepth: "basic" },
);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
expect(listing.url).toStartWith("https://www.kijiji.ca/");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
});
});
```
- [ ] **Step 2: Run Kijiji live test**
Run: `bun test packages/core/test/live/kijiji.live.test.ts` Expected: PASS when Kijiji
returns parseable search and detail pages; FAIL on endpoint/parser breakage.
### Task 3: Add Facebook Live Suite
**Files:**
- Create: `packages/core/test/live/facebook.live.test.ts`
- [ ] **Step 1: Write the live test file**
```ts
import { describe, expect, test } from "bun:test";
import fetchFacebookItems from "../../src/scrapers/facebook";
describe("Facebook live parser", () => {
test("requires FACEBOOK_COOKIE for strict live testing", () => {
expect(process.env.FACEBOOK_COOKIE?.trim().length ?? 0).toBeGreaterThan(0);
});
test("scrapes live marketplace search results into listing details", async () => {
const results = await fetchFacebookItems("iphone", 1, "toronto", 3);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
expect(listing.url).toStartWith("https://www.facebook.com/marketplace/item/");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
});
});
```
- [ ] **Step 2: Run Facebook live test**
Run: `bun test packages/core/test/live/facebook.live.test.ts` Expected: PASS with valid
`FACEBOOK_COOKIE`; FAIL when `FACEBOOK_COOKIE` is missing, expired, or parser output is
empty.
### Task 4: Add Root Live Test Script
**Files:**
- Modify: `package.json`
- [ ] **Step 1: Add script**
Change root `scripts` to include:
```json
{
"test:live": "bun test packages/core/test/live"
}
```
- [ ] **Step 2: Run all live tests through script**
Run: `bun run test:live` Expected: runs eBay, Kijiji, and Facebook live suites.
Facebook fails if `FACEBOOK_COOKIE` is unset.
### Task 5: Verify Default Suite Exclusion
**Files:**
- No code files modified.
- [ ] **Step 1: Run existing core tests**
Run: `bun test packages/core/test` Expected: existing mocked tests run.
If Bun discovers `packages/core/test/live`, change normal verification command to
explicit glob `bun test packages/core/test/*.test.ts` and document that in final notes.
- [ ] **Step 2: Run static checks**
Run: `bun run ci` Expected: typecheck and Biome pass.
Fix code issues without changing lint or TypeScript rules.
## Commit Note
Do not commit during execution unless user explicitly requests a commit.
This repo session policy overrides generic plan commit steps.
## Self-Review
- Spec coverage: eBay, Kijiji, Facebook live suites; explicit script; strict Facebook
auth; excluded from default flow.
- Placeholder scan: no `TBD`, `TODO`, or underspecified implementation steps.
- Type consistency: tests use current exported scraper signatures and shared listing
fields from `ListingDetails`.

View File

@@ -1,12 +1,13 @@
# Design: Adopt opencode Monorepo Config # Design: Adopt opencode Monorepo Config
**Date:** 2025-07-14 **Date:** 2025-07-14\
**Status:** Approved **Status:** Approved\
**Approach:** Full adoption (A) **Approach:** Full adoption (A)
## Context ## Context
Current repo (`marketplace-scrapers-monorepo`) has basic bun workspaces with 3 packages (`core`, `api-server`, `mcp-server`). Reference: `anomalyco/opencode` monorepo patterns. Current repo (`marketplace-scrapers-monorepo`) has basic bun workspaces with 3 packages
(`core`, `api-server`, `mcp-server`). Reference: `anomalyco/opencode` monorepo patterns.
**Gaps vs opencode:** **Gaps vs opencode:**
- No Turbo (task orchestration, caching, dep graph) - No Turbo (task orchestration, caching, dep graph)
@@ -20,7 +21,8 @@ Current repo (`marketplace-scrapers-monorepo`) has basic bun workspaces with 3 p
### 1. Root `package.json` ### 1. Root `package.json`
- Add `workspaces.catalog` block with shared deps: - Add `workspaces.catalog` block with shared deps:
- `@typescript/native-preview`, `@types/bun`, `@types/unidecode`, `@types/cli-progress` - `@typescript/native-preview`, `@types/bun`, `@types/unidecode`,
`@types/cli-progress`
- Add `turbo` to `devDependencies` - Add `turbo` to `devDependencies`
- Add `@tsconfig/bun` to `devDependencies` + catalog - Add `@tsconfig/bun` to `devDependencies` + catalog
- Update root scripts: `typecheck` and `build` delegate to `turbo run` - Update root scripts: `typecheck` and `build` delegate to `turbo run`
@@ -93,7 +95,8 @@ exact = true
root = "./do-not-run-tests-from-root" root = "./do-not-run-tests-from-root"
``` ```
Exact installs = reproducible. Root test guard prevents accidental root-level test runs. Exact installs = reproducible.
Root test guard prevents accidental root-level test runs.
### 6. Package `exports` field ### 6. Package `exports` field
@@ -102,7 +105,8 @@ Replace `main`/`module` with `exports` in all 3 packages:
"exports": { ".": "./src/index.ts" } "exports": { ".": "./src/index.ts" }
``` ```
Remove `main` and `module` fields. Bun resolves `.ts` directly. Remove `main` and `module` fields.
Bun resolves `.ts` directly.
### 7. Catalog references in per-package `package.json` ### 7. Catalog references in per-package `package.json`
@@ -115,7 +119,7 @@ Replace pinned versions with `"catalog:"` for shared deps:
## Files Changed ## Files Changed
| File | Action | | File | Action |
|---|---| | --- | --- |
| `package.json` | Update (catalog, turbo dep, scripts) | | `package.json` | Update (catalog, turbo dep, scripts) |
| `turbo.json` | Create | | `turbo.json` | Create |
| `tsconfig.json` | Create | | `tsconfig.json` | Create |

View File

@@ -3,7 +3,9 @@
## Summary ## Summary
Remove all file-based and request-provided cookie inputs across the repo. Remove all file-based and request-provided cookie inputs across the repo.
The only supported authentication input becomes a raw `Cookie` header string supplied through scraper-specific environment variables such as `FACEBOOK_COOKIE` and `EBAY_COOKIE`. The only supported authentication input becomes a raw `Cookie` header string supplied
through scraper-specific environment variables such as `FACEBOOK_COOKIE` and
`EBAY_COOKIE`.
## Goals ## Goals
@@ -17,7 +19,8 @@ The only supported authentication input becomes a raw `Cookie` header string sup
- Changing scraper behavior unrelated to authentication input. - Changing scraper behavior unrelated to authentication input.
- Adding new cookie formats or migration helpers. - Adding new cookie formats or migration helpers.
- Preserving backward compatibility for cookie files, JSON cookie arrays, or request overrides. - Preserving backward compatibility for cookie files, JSON cookie arrays, or request
overrides.
## Current State ## Current State
@@ -27,27 +30,33 @@ The current shared cookie utilities support three sources in priority order:
2. Environment variable 2. Environment variable
3. Cookie file 3. Cookie file
`packages/core/src/utils/cookies.ts` includes file loading, JSON array parsing, and auto-detection between JSON and header-string formats. `packages/core/src/utils/cookies.ts` includes file loading, JSON array parsing, and
Facebook also exposes deprecated `cookiePath` arguments that still reach shared loading logic. auto-detection between JSON and header-string formats.
Docs in `cookies/AGENTS.md` still describe file-based setup and request-level overrides. Facebook also exposes deprecated `cookiePath` arguments that still reach shared loading
logic. Docs in `cookies/AGENTS.md` still describe file-based setup and request-level
overrides.
## Chosen Approach ## Chosen Approach
Use the hard-reset approach. Use the hard-reset approach.
Delete the shared multi-source cookie-loading model and reduce the cookie surface to env-header parsing only. Delete the shared multi-source cookie-loading model and reduce the cookie surface to
This is a larger diff than a surgical removal, but it avoids leaving behind abstractions that imply unsupported inputs still exist. env-header parsing only.
This is a larger diff than a surgical removal, but it avoids leaving behind abstractions
that imply unsupported inputs still exist.
## Design ## Design
### Shared Cookie Utilities ### Shared Cookie Utilities
`packages/core/src/utils/cookies.ts` will keep only the pieces needed for env-header-based auth: `packages/core/src/utils/cookies.ts` will keep only the pieces needed for
env-header-based auth:
- `Cookie` type - `Cookie` type
- A reduced cookie config shape containing only `name`, `domain`, and `envVar` - A reduced cookie config shape containing only `name`, `domain`, and `envVar`
- `parseCookieString()` for raw `Cookie` header strings - `parseCookieString()` for raw `Cookie` header strings
- `formatCookiesForHeader()` for domain filtering and request formatting - `formatCookiesForHeader()` for domain filtering and request formatting
- An env-only loader that reads `process.env[config.envVar]`, parses it, and throws a targeted error when missing or invalid - An env-only loader that reads `process.env[config.envVar]`, parses it, and throws a
targeted error when missing or invalid
The following shared utilities will be removed: The following shared utilities will be removed:
@@ -68,15 +77,18 @@ For Facebook this means:
For eBay this means: For eBay this means:
- Remove any remaining fallback/file-oriented behavior from shared calls and error strings - Remove any remaining fallback/file-oriented behavior from shared calls and error
strings
- Keep the existing env-var auth path, but make it the only path - Keep the existing env-var auth path, but make it the only path
### Public API Surface ### Public API Surface
Exports from `packages/core/src/index.ts` should reflect the new contract. Exports from `packages/core/src/index.ts` should reflect the new contract.
If exported functions currently advertise cookie-source or cookie-path arguments, their signatures will be tightened so callers cannot pass unsupported inputs. If exported functions currently advertise cookie-source or cookie-path arguments, their
signatures will be tightened so callers cannot pass unsupported inputs.
Downstream adapter packages should continue calling core through the simplified signatures without adding their own cookie-loading behavior. Downstream adapter packages should continue calling core through the simplified
signatures without adding their own cookie-loading behavior.
### Error Handling ### Error Handling
@@ -93,8 +105,8 @@ Errors should be blunt and specific:
### Testing Strategy ### Testing Strategy
Follow TDD. Follow TDD. Start by changing or adding core tests so the old file/request behavior is
Start by changing or adding core tests so the old file/request behavior is no longer accepted. no longer accepted.
Coverage targets: Coverage targets:
@@ -102,7 +114,8 @@ Coverage targets:
2. Missing env vars fail with the new env-only error. 2. Missing env vars fail with the new env-only error.
3. Invalid env strings fail without falling back to files or request data. 3. Invalid env strings fail without falling back to files or request data.
4. Facebook APIs no longer expose or honor cookie-path/request-cookie behavior. 4. Facebook APIs no longer expose or honor cookie-path/request-cookie behavior.
5. Existing tests that depended on missing files or JSON cookie arrays are rewritten to the env-only contract. 5. Existing tests that depended on missing files or JSON cookie arrays are rewritten to
the env-only contract.
Verification target after implementation: Verification target after implementation:
@@ -121,11 +134,15 @@ Update cookie-related docs to match the new contract:
## Risks ## Risks
- External callers using request cookie overrides will break at compile time or runtime, depending on how they consume the package. - External callers using request cookie overrides will break at compile time or runtime,
- Recent work added support for custom Facebook cookie paths, so removing that path intentionally reverses a newly introduced behavior. depending on how they consume the package.
- Tests that currently model missing-file behavior must be rewritten rather than preserved. - Recent work added support for custom Facebook cookie paths, so removing that path
intentionally reverses a newly introduced behavior.
- Tests that currently model missing-file behavior must be rewritten rather than
preserved.
## Rollout Notes ## Rollout Notes
This is an intentional contract break. This is an intentional contract break.
The code, tests, and docs should all land together so there is no mixed messaging about supported cookie sources. The code, tests, and docs should all land together so there is no mixed messaging about
supported cookie sources.

View File

@@ -2,35 +2,46 @@
## Summary ## Summary
Replace the legacy Facebook Marketplace scraper with a route-aware implementation built around current Comet bootstrap markers and route-specific extraction. Replace the legacy Facebook Marketplace scraper with a route-aware implementation built
The new scraper will keep authenticated direct HTTP fetches as the primary transport, but it will stop treating legacy `require`, `__bbox`, and `marketplace_product_details_page` structures as the main parsing contract. around current Comet bootstrap markers and route-specific extraction.
The new scraper will keep authenticated direct HTTP fetches as the primary transport,
but it will stop treating legacy `require`, `__bbox`, and
`marketplace_product_details_page` structures as the main parsing contract.
## Goals ## Goals
- Replace both Facebook search and item-detail extraction with a current-shape parser. - Replace both Facebook search and item-detail extraction with a current-shape parser.
- Keep authenticated direct HTTP requests as the primary fetch strategy. - Keep authenticated direct HTTP requests as the primary fetch strategy.
- Parse route-specific Comet bootstrap/state payloads before falling back to rendered-HTML extraction. - Parse route-specific Comet bootstrap/state payloads before falling back to
rendered-HTML extraction.
- Detect auth-gated, unavailable, and unknown responses explicitly. - Detect auth-gated, unavailable, and unknown responses explicitly.
- Update tests so they model current route markers and failure modes instead of legacy page objects. - Update tests so they model current route markers and failure modes instead of legacy
page objects.
## Non-Goals ## Non-Goals
- Reworking non-Facebook scrapers. - Reworking non-Facebook scrapers.
- Converting the scraper to browser-only automation. - Converting the scraper to browser-only automation.
- Preserving old parser behavior for `marketplace_product_details_page` or `__bbox`-driven item extraction. - Preserving old parser behavior for `marketplace_product_details_page` or
- Reverse-engineering every internal Facebook bootstrap payload shape exhaustively before implementation. `__bbox`-driven item extraction.
- Reverse-engineering every internal Facebook bootstrap payload shape exhaustively
before implementation.
## Current State ## Current State
The current implementation in `packages/core/src/scrapers/facebook.ts` still uses authenticated HTTP requests, which remains correct. The current implementation in `packages/core/src/scrapers/facebook.ts` still uses
The search path parses embedded script JSON and looks for `marketplace_search.feed_units.edges`. authenticated HTTP requests, which remains correct.
The item-detail path is centered on legacy extraction paths such as: The search path parses embedded script JSON and looks for
`marketplace_search.feed_units.edges`. The item-detail path is centered on legacy
extraction paths such as:
- `parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target` - `parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
- nested `__bbox.require[...]` variations - nested `__bbox.require[...]` variations
- recursive search through `parsed.require` - recursive search through `parsed.require`
Live evidence gathered earlier in this session and by the isolated research subagent shows that current Facebook Marketplace pages are Comet route-driven and expose markers such as: Live evidence gathered earlier in this session and by the isolated research subagent
shows that current Facebook Marketplace pages are Comet route-driven and expose markers
such as:
- `XCometMarketplaceSearchController` - `XCometMarketplaceSearchController`
- `XCometMarketplacePermalinkController` - `XCometMarketplacePermalinkController`
@@ -41,7 +52,9 @@ Live evidence gathered earlier in this session and by the isolated research suba
- `data-sjs` - `data-sjs`
- `data-btmanifest` - `data-btmanifest`
The same live investigation also showed that authenticated item pages no longer expose the old `marketplace_product_details_page` marker reliably, while live search still returns usable results. The same live investigation also showed that authenticated item pages no longer expose
the old `marketplace_product_details_page` marker reliably, while live search still
returns usable results.
## Chosen Approach ## Chosen Approach
@@ -52,9 +65,11 @@ The scraper will:
1. Fetch authenticated HTML directly. 1. Fetch authenticated HTML directly.
2. Classify the response using current route and auth markers. 2. Classify the response using current route and auth markers.
3. Parse inline bootstrap/state payloads using route-specific probes. 3. Parse inline bootstrap/state payloads using route-specific probes.
4. Fall back to rendered-HTML extraction only when bootstrap markers are present but the payload cannot be decoded into the expected search or item shape. 4. Fall back to rendered-HTML extraction only when bootstrap markers are present but the
payload cannot be decoded into the expected search or item shape.
This keeps the cheaper direct-HTTP transport while shifting the parser contract from legacy page-object names to current Comet route structure. This keeps the cheaper direct-HTTP transport while shifting the parser contract from
legacy page-object names to current Comet route structure.
## Design ## Design
@@ -88,7 +103,8 @@ Primary behavior:
- fetch the Marketplace search HTML with auth cookies - fetch the Marketplace search HTML with auth cookies
- confirm the response class is `search` - confirm the response class is `search`
- extract inline bootstrap/state blobs from script tags and page attributes - extract inline bootstrap/state blobs from script tags and page attributes
- probe for route-specific search payloads associated with `XCometMarketplaceSearchController` - probe for route-specific search payloads associated with
`XCometMarketplaceSearchController`
- map decoded search results into summary listing records - map decoded search results into summary listing records
Search summary fields should remain aligned with the current public output shape: Search summary fields should remain aligned with the current public output shape:
@@ -102,7 +118,8 @@ Search summary fields should remain aligned with the current public output shape
Fallback behavior: Fallback behavior:
- if search route markers are present but structured payload decoding fails, extract listing summaries from rendered HTML anchors and text patterns - if search route markers are present but structured payload decoding fails, extract
listing summaries from rendered HTML anchors and text patterns
- use item links matching `/marketplace/item/<id>` as the anchor for fallback extraction - use item links matching `/marketplace/item/<id>` as the anchor for fallback extraction
- treat fallback results as summary-only data, not rich detail data - treat fallback results as summary-only data, not rich detail data
@@ -132,9 +149,12 @@ Priority item fields:
Fallback behavior: Fallback behavior:
- if permalink route markers are present but no stable payload object is decodable, extract data from rendered HTML text structure - if permalink route markers are present but no stable payload object is decodable,
- prioritize title, price, condition, description, location text, and seller module content extract data from rendered HTML text structure
- return partial item data when core user-facing fields are present rather than failing solely because deeper commerce metadata is missing - prioritize title, price, condition, description, location text, and seller module
content
- return partial item data when core user-facing fields are present rather than failing
solely because deeper commerce metadata is missing
### Bootstrap Parsing Strategy ### Bootstrap Parsing Strategy
@@ -151,11 +171,14 @@ Candidate discovery inputs:
- `ServerJS` / `Bootloader` inline blobs - `ServerJS` / `Bootloader` inline blobs
- route controller names - route controller names
Candidate scoring for search should favor objects that contain repeated result-card semantics, item IDs, listing links, titles, prices, or location summaries. Candidate scoring for search should favor objects that contain repeated result-card
Candidate scoring for item pages should favor objects that contain singular listing semantics, title, price, condition, description, location, seller, or permalink context. semantics, item IDs, listing links, titles, prices, or location summaries.
Candidate scoring for item pages should favor objects that contain singular listing
semantics, title, price, condition, description, location, seller, or permalink context.
The parser should not depend on one hard-coded object name surviving forever. The parser should not depend on one hard-coded object name surviving forever.
Instead, it should look for route-specific semantic clusters and choose the strongest candidate. Instead, it should look for route-specific semantic clusters and choose the strongest
candidate.
### Legacy Removal ### Legacy Removal
@@ -166,7 +189,9 @@ Specifically:
- delete legacy-first `require` / `__bbox` navigation tables - delete legacy-first `require` / `__bbox` navigation tables
- delete tests whose only purpose is to preserve those legacy paths - delete tests whose only purpose is to preserve those legacy paths
If a minimal legacy compatibility branch remains, it must be a last-resort fallback behind the new route-aware parser and should not shape test fixtures or design decisions. If a minimal legacy compatibility branch remains, it must be a last-resort fallback
behind the new route-aware parser and should not shape test fixtures or design
decisions.
### Error Handling ### Error Handling
@@ -178,7 +203,8 @@ Facebook responses should now fail with explicit route-aware outcomes:
4. Search or item route detected, but no decodable data found. 4. Search or item route detected, but no decodable data found.
5. Unknown response shape. 5. Unknown response shape.
Error messages should name the actual class of failure instead of implying that every parse miss is caused by expired cookies. Error messages should name the actual class of failure instead of implying that every
parse miss is caused by expired cookies.
### Testing Strategy ### Testing Strategy
@@ -190,11 +216,15 @@ Coverage targets:
1. Search responses classify correctly from current Comet controller markers. 1. Search responses classify correctly from current Comet controller markers.
2. Item responses classify correctly from current Comet controller markers. 2. Item responses classify correctly from current Comet controller markers.
3. Login-gated and unavailable responses are detected before parsing. 3. Login-gated and unavailable responses are detected before parsing.
4. Search bootstrap parsing produces summary listing results from current-shape fixtures. 4. Search bootstrap parsing produces summary listing results from current-shape
fixtures.
5. Item bootstrap parsing produces rich listing details from current-shape fixtures. 5. Item bootstrap parsing produces rich listing details from current-shape fixtures.
6. Search fallback extraction works when route markers exist but structured payload decoding fails. 6. Search fallback extraction works when route markers exist but structured payload
7. Item fallback extraction works when route markers exist but structured payload decoding fails. decoding fails.
8. Old legacy-only item fixtures are removed or rewritten so they no longer define the contract. 7. Item fallback extraction works when route markers exist but structured payload
decoding fails.
8. Old legacy-only item fixtures are removed or rewritten so they no longer define the
contract.
Verification target after implementation: Verification target after implementation:
@@ -204,23 +234,30 @@ Verification target after implementation:
## Public API Surface ## Public API Surface
Keep the current public function names unless the rewrite proves that a signature change is required: Keep the current public function names unless the rewrite proves that a signature change
is required:
- `fetchFacebookItems(...)` - `fetchFacebookItems(...)`
- `fetchFacebookItem(...)` - `fetchFacebookItem(...)`
- `extractFacebookMarketplaceData(...)` - `extractFacebookMarketplaceData(...)`
- `extractFacebookItemData(...)` - `extractFacebookItemData(...)`
The internals should change substantially, but callers should not need a new integration surface for this rewrite. The internals should change substantially, but callers should not need a new integration
surface for this rewrite.
## Risks ## Risks
- Facebook may change bootstrap payload naming again, so route/controller markers are more stable than exact nested object paths but still not guaranteed. - Facebook may change bootstrap payload naming again, so route/controller markers are
- Search and item pages may each contain multiple partial payloads, making candidate ranking important. more stable than exact nested object paths but still not guaranteed.
- Fallback rendered-HTML extraction may be noisier than bootstrap decoding and needs clear precedence rules. - Search and item pages may each contain multiple partial payloads, making candidate
- Live fixtures can drift from production quickly, so tests must model route semantics rather than exact one-off payloads where possible. ranking important.
- Fallback rendered-HTML extraction may be noisier than bootstrap decoding and needs
clear precedence rules.
- Live fixtures can drift from production quickly, so tests must model route semantics
rather than exact one-off payloads where possible.
## Rollout Notes ## Rollout Notes
The code, fixtures, and tests should change together. The code, fixtures, and tests should change together.
There should be no mixed state where the implementation is Comet-aware but the tests still encode `marketplace_product_details_page` as the primary contract. There should be no mixed state where the implementation is Comet-aware but the tests
still encode `marketplace_product_details_page` as the primary contract.

View File

@@ -2,15 +2,18 @@
## Summary ## Summary
Add an optional shared result mode across Facebook, eBay, and Kijiji that moves suspiciously cheap listings out of the main results into a separate `unstableResults` bucket. Add an optional shared result mode across Facebook, eBay, and Kijiji that moves
Listings are considered unstable when their price is more than 20% below the median price of the scraper's priced search results. suspiciously cheap listings out of the main results into a separate `unstableResults`
bucket. Listings are considered unstable when their price is more than 20% below the
median price of the scrapers priced search results.
## Goals ## Goals
- Support the same optional unstable-listing mode across all scrapers. - Support the same optional unstable-listing mode across all scrapers.
- Keep current default scraper and route behavior unchanged unless the mode is enabled. - Keep current default scraper and route behavior unchanged unless the mode is enabled.
- Hide unstable listings from the main results while still returning them separately. - Hide unstable listings from the main results while still returning them separately.
- Implement the rule once in shared core code instead of duplicating marketplace-specific logic. - Implement the rule once in shared core code instead of duplicating
marketplace-specific logic.
- Document the option in MCP tool descriptions so callers can discover it. - Document the option in MCP tool descriptions so callers can discover it.
## Non-Goals ## Non-Goals
@@ -24,7 +27,8 @@ Listings are considered unstable when their price is more than 20% below the med
`packages/core` currently returns plain arrays from scraper search functions. `packages/core` currently returns plain arrays from scraper search functions.
`packages/api-server` forwards those scraper results directly from marketplace routes. `packages/api-server` forwards those scraper results directly from marketplace routes.
`packages/mcp-server` documents search tools per marketplace, but does not expose or describe any result-stability mode. `packages/mcp-server` documents search tools per marketplace, but does not expose or
describe any result-stability mode.
There is no shared result-classification utility today. There is no shared result-classification utility today.
Price filtering exists in some scrapers, but not a cross-marketplace median-based split. Price filtering exists in some scrapers, but not a cross-marketplace median-based split.
@@ -33,11 +37,14 @@ Price filtering exists in some scrapers, but not a cross-marketplace median-base
Use a shared core utility plus per-route and per-tool opt-in. Use a shared core utility plus per-route and per-tool opt-in.
The shared utility will accept parsed listings, compute the median from valid positive prices, and split the data into `results` and `unstableResults`. The shared utility will accept parsed listings, compute the median from valid positive
Each scraper will opt into that utility when the caller enables unstable-listing mode. prices, and split the data into `results` and `unstableResults`. Each scraper will opt
API routes and MCP tools will expose the same optional mode so the feature is consistently available everywhere scraper search is surfaced. into that utility when the caller enables unstable-listing mode.
API routes and MCP tools will expose the same optional mode so the feature is
consistently available everywhere scraper search is surfaced.
This keeps the heuristic centralized, minimizes duplicated logic, and preserves existing consumers by leaving the default path unchanged. This keeps the heuristic centralized, minimizes duplicated logic, and preserves existing
consumers by leaving the default path unchanged.
## Design ## Design
@@ -48,14 +55,16 @@ Add a shared utility in `packages/core` for listing stability classification.
Responsibilities: Responsibilities:
- accept parsed listing arrays with `listingPrice.cents` - accept parsed listing arrays with `listingPrice.cents`
- ignore listings whose price is missing, non-numeric, or non-positive when computing the median - ignore listings whose price is missing, non-numeric, or non-positive when computing
the median
- compute the median price from valid priced listings - compute the median price from valid priced listings
- classify listings as unstable when `listingPrice.cents < median * 0.8` - classify listings as unstable when `listingPrice.cents < median * 0.8`
- return an object with: - return an object with:
- `results`: listings that remain in the main bucket - `results`: listings that remain in the main bucket
- `unstableResults`: listings moved out of the main bucket - `unstableResults`: listings moved out of the main bucket
Listings excluded from median computation because their price is missing or non-positive remain in `results` unchanged. Listings excluded from median computation because their price is missing or non-positive
remain in `results` unchanged.
### Scraper Integration ### Scraper Integration
@@ -68,7 +77,8 @@ Default behavior:
Opt-in behavior: Opt-in behavior:
- run the shared classification utility after parsing search results - run the shared classification utility after parsing search results
- classify before final result limiting so unstable items do not consume main-result slots - classify before final result limiting so unstable items do not consume main-result
slots
- return an object shaped like: - return an object shaped like:
```ts ```ts
@@ -82,7 +92,8 @@ Each scraper will use its existing concrete listing subtype for these arrays.
### API Surface ### API Surface
Marketplace API routes will expose an optional query parameter for unstable-listing mode. Marketplace API routes will expose an optional query parameter for unstable-listing
mode.
Requirements: Requirements:
@@ -90,7 +101,8 @@ Requirements:
- when enabled, return the object payload with `results` and `unstableResults` - when enabled, return the object payload with `results` and `unstableResults`
- use the same semantics across Facebook, eBay, and Kijiji routes - use the same semantics across Facebook, eBay, and Kijiji routes
The exact parameter name should be consistent across routes and intentionally describe the behavior, for example `unstableFilter=true`. The exact parameter name should be consistent across routes and intentionally describe
the behavior, for example `unstableFilter=true`.
### MCP Surface ### MCP Surface
@@ -100,34 +112,43 @@ Tool descriptions should explicitly document:
- that the option is optional - that the option is optional
- that it moves listings priced more than 20% below the median into `unstableResults` - that it moves listings priced more than 20% below the median into `unstableResults`
- that enabling it changes the response shape from a plain list to an object with `results` and `unstableResults` - that enabling it changes the response shape from a plain list to an object with
`results` and `unstableResults`
- that the behavior is available for Facebook, eBay, and Kijiji search tools - that the behavior is available for Facebook, eBay, and Kijiji search tools
The wording should be aligned across all three tools so the feature reads as one shared capability. The wording should be aligned across all three tools so the feature reads as one shared
capability.
### Error Handling ### Error Handling
The unstable-listing mode should be best-effort and non-failing. The unstable-listing mode should be best-effort and non-failing.
- If there are no valid positive prices, return all listings in `results` and an empty `unstableResults` array. - If there are no valid positive prices, return all listings in `results` and an empty
`unstableResults` array.
- If there is only one valid priced listing, do not classify it as unstable. - If there is only one valid priced listing, do not classify it as unstable.
- Parsing failures remain governed by existing scraper behavior; the classification layer should not introduce new scraper-specific errors. - Parsing failures remain governed by existing scraper behavior; the classification
layer should not introduce new scraper-specific errors.
### Testing Strategy ### Testing Strategy
Follow TDD. Follow TDD. Start with shared utility tests, then wire the option through scraper and
Start with shared utility tests, then wire the option through scraper and route tests. route tests.
Coverage targets: Coverage targets:
1. Median calculation for odd-sized valid price sets. 1. Median calculation for odd-sized valid price sets.
2. Median calculation for even-sized valid price sets. 2. Median calculation for even-sized valid price sets.
3. Strict cutoff behavior where only listings with `price < median * 0.8` move to `unstableResults`. 3. Strict cutoff behavior where only listings with `price < median * 0.8` move to
4. Missing, invalid, zero, or negative prices are excluded from median computation and remain in `results`. `unstableResults`.
4. Missing, invalid, zero, or negative prices are excluded from median computation and
remain in `results`.
5. Default scraper behavior still returns plain arrays when the option is disabled. 5. Default scraper behavior still returns plain arrays when the option is disabled.
6. Enabled scraper behavior returns `{ results, unstableResults }` for Facebook, eBay, and Kijiji. 6. Enabled scraper behavior returns `{ results, unstableResults }` for Facebook, eBay,
7. API routes preserve existing response shapes by default and switch to the object payload only when enabled. and Kijiji.
8. MCP tool metadata documents the new optional mode for all three marketplace search tools. 7. API routes preserve existing response shapes by default and switch to the object
payload only when enabled.
8. MCP tool metadata documents the new optional mode for all three marketplace search
tools.
Verification target after implementation: Verification target after implementation:
@@ -138,11 +159,15 @@ Verification target after implementation:
## Risks ## Risks
- The optional mode introduces a union return shape for scraper callers, which can ripple into downstream TypeScript signatures. - The optional mode introduces a union return shape for scraper callers, which can
- Applying classification before final limiting changes which items appear in the main bucket compared with a naive post-limit split. ripple into downstream TypeScript signatures.
- Kijiji and eBay may have different mixes of priced and unpriced results, so excluding non-positive prices from the median must remain explicit and tested. - Applying classification before final limiting changes which items appear in the main
bucket compared with a naive post-limit split.
- Kijiji and eBay may have different mixes of priced and unpriced results, so excluding
non-positive prices from the median must remain explicit and tested.
## Rollout Notes ## Rollout Notes
Land the shared classifier, scraper wiring, route wiring, tests, and MCP description updates together. Land the shared classifier, scraper wiring, route wiring, tests, and MCP description
That avoids a partial rollout where the feature exists in one surface but is undocumented or inconsistent elsewhere. updates together. That avoids a partial rollout where the feature exists in one surface
but is undocumented or inconsistent elsewhere.

View File

@@ -0,0 +1,44 @@
# Live Parser Tests Design
## Summary
Add explicit live endpoint tests for each core scraper parser path.
These tests are excluded from normal deterministic test commands and run only through a
dedicated package script.
## Scope
- Add one live suite per parser: eBay, Kijiji, Facebook.
- Place suites under `packages/core/test/live/` so normal
`bun test packages/core/test/*.test.ts` patterns do not include them accidentally.
- Add a root `test:live` script that runs all live suites together.
- Keep existing mocked tests unchanged.
## Behavior
- Each suite calls the public scraper entry point for that marketplace with a narrow
query and low max item count.
- Assertions verify scrape output shape and parser viability, not exact listing
identity.
- eBay and Kijiji require live network access and fail on endpoint/parser breakage.
- Facebook is strict: missing or expired `FACEBOOK_COOKIE` fails the live suite instead
of skipping.
## Test Data
- Use stable broad Canadian queries such as `iphone` or `laptop` to reduce empty-result
risk.
- Use low limits to avoid unnecessary load and rate-limit pressure.
- Avoid exact prices, titles, listing IDs, or ordering assumptions.
## Failure Meaning
- Empty result arrays fail because live parser logic did not produce usable listings.
- Missing required fields fail because adapter contracts depend on those fields.
- Authentication failures fail for Facebook because selected scope is strict.
## Verification
- Normal suite remains offline: `bun test packages/core/test`.
- Live suite runs by explicit script: `bun run test:live`.
- Full static checks remain via `bun run ci`.

View File

@@ -0,0 +1,173 @@
# Facebook Marketplace Anti-Bot Challenge Solver Design
## Summary
Add a challenge-detection and challenge-solving layer to the Facebook Marketplace
scraper so it can handle anti-bot gates (checkpoint pages, token rotation, cookie
requirements) programmatically.
Build the solver in pure Bun — no browser automation in production.
Use `agent-browser` only for one-time debug reconnaissance.
## Goals
- Identify which anti-bot challenge(s) Facebook Marketplace triggers against
programmatic HTTP requests.
- Implement detection + solving for each discovered challenge type.
- Wire the solver into `fetchFacebookItems` and `fetchFacebookItem` so challenges are
handled transparently.
- Follow the same pattern as the existing `ebay-challenge.ts` (detect → solve → retry
with clearance).
- Zero browser automation at runtime.
Pure `fetch` + `Bun` APIs + npm packages only.
## Non-Goals
- Solving login/auth-wall challenges (those require fresh cookies — not solvable
programmatically).
- Full account login automation (cookies must be provided by the user).
- Browser-based scraping or Puppeteer/Playwright integration.
- Solving challenges for non-Marketplace Facebook endpoints.
## Current State
The Facebook scraper (`packages/core/src/scrapers/facebook.ts`) fetches Marketplace
search and item pages via authenticated `fetch` with cookies from `FACEBOOK_COOKIE` env
var. It:
- Sends a browser-like header set (`sec-ch-ua`, `user-agent`, etc.)
- Parses SSR HTML for embedded JSON in script tags
- Has no challenge detection — if Facebook returns a challenge page, the scraper
silently fails (no listings parsed, classifies as “unknown”)
- Depends entirely on cookie freshness
The eBay scraper already follows the challenge-solver pattern in this codebase:
`ebay.ts` uses `warmEbaySession()`, `isChallengeRedirect()`, `isChallengeHtml()`, and
`solveEbayChallenge()` from `ebay-challenge.ts`.
## Chosen Approach
**Reconnaissance-first development:**
1. Use `agent-browser` (debug only) to capture a real Facebook Marketplace browsing
session via HAR.
2. Probe programmatic `fetch` to see what Facebook returns without a browser.
3. Diff the two to identify the gap (missing headers?
missing cookies? missing JS execution?).
4. Build a modular solver in `packages/core/src/utils/facebook-challenge.ts` that
detects each challenge type and applies the appropriate fix.
5. Wire it into `facebook.ts` following the eBay pattern.
## Design
### File Plan
| File | Purpose |
| --- | --- |
| `packages/core/src/utils/facebook-challenge.ts` | Challenge detection, solving, and cookie/session utilities |
| `packages/core/src/scrapers/facebook.ts` | Modified: warmup, challenge detection before parsing, retry loop |
| `packages/core/test/facebook-challenge.test.ts` | Unit tests with mock challenge HTML fixtures |
### Flow
```
fetchFacebookItems(searchUrl)
├── warmFacebookSession() → GET facebook.com/ (collect datr + Akamai cookies)
├── fetchHtml(searchUrl) → receives response
├── detectFacebookChallenge(response)
│ ├── checkpoint/challenge HTML → solveCheckpointChallenge()
│ ├── redirect to /login → fail (cookies expired)
│ ├── missing required cookies → regenerate session
│ ├── 429 rate limit → backoff + retry (existing http.ts handles this)
│ └── no challenge → proceed to parsing
├── if solveCheckpointChallenge succeeds → retry fetchHtml with clearance cookie
└── parse results
```
### Challenge Types (to be confirmed by reconnaissance)
| Type | Expected Signal | Solving Strategy |
| --- | --- | --- |
| Login wall | Redirect to `/login` or HTML `"You must log in"` | Fail — user must provide fresh cookies |
| Checkpoint page | HTML contains `checkpoint` or `challenge` path | Parse hidden form fields, compute proof-of-work if present, submit answer endpoint |
| `datr` cookie missing | No `datr` in cookie jar → request fails | Fetch homepage first to obtain `datr` (session warmup) |
| DTSG token needed | Form submissions fail with CSRF error | Extract `fb_dtsg` from page HTML, include in request body |
| GraphQL header check | Request blocked without internal headers | Extract `x-fb-friendly-name` from browser HAR, replicate |
| Akamai/bot-manager | Redirect loops or blank pages without Akamai cookies | Homepage warmup to collect `bm_sv`, `bm_mi`, etc. |
### Key Modules
**`facebook-challenge.ts`:**
```
// Session warmup — fetch homepage to prime cookies
warmFacebookSession(): Promise<Record<string, string>>
// Challenge detection
detectFacebookChallenge(html, status, url, headers): ChallengeType | null
// Checkpoint solver
solveCheckpointChallenge(html, cookies): Promise<ChallengeResult>
// DTSG token extraction
extractDtsg(html): string | null
// Cookie jar management (shared with ebay.ts pattern)
mergeCookies(...): Record<string, string>
```
**`ChallengeResult` type:**
```ts
interface ChallengeResult {
solved: boolean;
cookies?: Record<string, string>; // clearance cookies to replay
token?: string; // challenge response token
error?: string; // why it failed
}
```
### Error Handling
- Solver failure → return `ChallengeResult { solved: false, error: "..." }`, scraper
logs warning and returns empty results (never throws).
- Unrecognized challenge → log the response URL and HTML snippet for future analysis.
- Rate limits → handled by existing `http.ts` exponential backoff (no change needed).
- Solver timeout → 30s cap on any challenge computation, fall back to `solved: false`.
### Testing
| Test | What It Verifies |
| --- | --- |
| `detectFacebookChallenge` with sample checkpoint HTML | Correctly identifies checkpoint challenge |
| `detectFacebookChallenge` with normal search HTML | Returns null (no false positives) |
| `detectFacebookChallenge` with login redirect | Identifies auth-gated |
| `solveCheckpointChallenge` with known PoW params | Produces correct answer |
| `warmFacebookSession` with mocked fetch | Collects expected cookies |
| `extractDtsg` with sample page HTML | Extracts the DTSG token |
| Integration: fetch → challenge → solve → retry → results | End-to-end mock flow |
| Solver throws → scraper returns empty, no crash | Graceful fallback |
| Solver unknown challenge → logs warning, returns empty | No unhandled challenge crashes |
Test data will use anonymized HTML fixtures (no real user data).
## Reconnaissance Steps (debug-only, one-time)
1. **Probe programmatically:** `fetch` Marketplace search with/without cookies, record
status code and HTML.
2. **Browser session:** `agent-browser` → log into Facebook → navigate Marketplace →
record HAR.
3. **Diff analysis:** Compare browser request headers vs.
our programmatic headers.
4. **Cookie inventory:** List all cookies from browser session, identify which are
essential.
5. **Challenge trigger:** Identify what change in request signature triggers a
challenge.
6. **Replay test:** Replay browsers exact request via `fetch` to confirm
headers/cookies are the differentiator.
All reconnaissance artifacts saved under `docs/facebook-challenge/`.
## Decisions Deferred to Post-Reconnaissance
- Exact challenge types and solving strategies (depends on what Facebook actually uses).
- Whether a PoW solver, CAPTCHA solver, or token-extraction approach is needed.
- npm package dependencies (only add what the reconnaissance proves necessary).

View File

@@ -11,7 +11,8 @@
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify", "build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify", "build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
"build:all": "bun run build:api && bun run build:mcp", "build:all": "bun run build:api && bun run build:mcp",
"ci": "bun run typecheck && biome ci", "ci": "bun run typecheck && biome check --write",
"test:live": "bun test --cwd packages/core test/live",
"clean": "rm -rf dist", "clean": "rm -rf dist",
"start": "./scripts/start.sh" "start": "./scripts/start.sh"
}, },

View File

@@ -19,5 +19,6 @@
## Verify ## Verify
- `bun test packages/api-server/test`
- `bun run --cwd packages/api-server build` - `bun run --cwd packages/api-server build`
- `bun run ci` - `bun run ci`

View File

@@ -1,82 +1,71 @@
import { fetchEbayItems } from "@marketplace-scrapers/core"; import { fetchEbayItems } from "@marketplace-scrapers/core";
import { logger } from "../logger"; import { logger } from "../logger";
import {
emptySearchResponse,
getRequiredSearchQuery,
parseDollarPriceParam,
parseNonNegativeIntegerParam,
} from "./helpers";
/** /**
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly} * GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
* Search eBay for listings (default: Buy It Now only, Canada only) * Search eBay for listings (default: Buy It Now only, Canada only)
*/ */
export async function ebayRoute(req: Request): Promise<Response> { export async function ebayRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url);
const SEARCH_QUERY = getRequiredSearchQuery(req);
if (SEARCH_QUERY instanceof Response) {
return SEARCH_QUERY;
}
const minPrice = parseDollarPriceParam(reqUrl.searchParams, "minPrice");
if (minPrice instanceof Response) {
return minPrice;
}
const maxPrice = parseDollarPriceParam(reqUrl.searchParams, "maxPrice");
if (maxPrice instanceof Response) {
return maxPrice;
}
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam
? exclusionsParam.split(",").map((s) => s.trim())
: [];
const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam
? keywordsParam.split(",").map((s) => s.trim())
: [SEARCH_QUERY];
const maxItems = parseNonNegativeIntegerParam(
reqUrl.searchParams,
"maxItems",
);
if (maxItems instanceof Response) {
return maxItems;
}
const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true";
const opts = {
minPrice,
maxPrice,
strictMode,
exclusions,
keywords,
buyItNowOnly,
canadaOnly,
maxItems,
};
try { try {
const reqUrl = new URL(req.url);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message:
"Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const minPriceParam = reqUrl.searchParams.get("minPrice");
const minPrice = minPriceParam ? parseInt(minPriceParam, 10) : undefined;
if (minPriceParam && (Number.isNaN(minPrice) || (minPrice ?? 0) < 0)) {
return Response.json(
{ message: "Invalid minPrice parameter" },
{ status: 400 },
);
}
const maxPriceParam = reqUrl.searchParams.get("maxPrice");
const maxPrice = maxPriceParam ? parseInt(maxPriceParam, 10) : undefined;
if (maxPriceParam && (Number.isNaN(maxPrice) || (maxPrice ?? 0) < 0)) {
return Response.json(
{ message: "Invalid maxPrice parameter" },
{ status: 400 },
);
}
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam
? exclusionsParam.split(",").map((s) => s.trim())
: [];
const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam
? keywordsParam.split(",").map((s) => s.trim())
: [SEARCH_QUERY];
const maxItemsParam = reqUrl.searchParams.get("maxItems");
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
if (maxItemsParam && (Number.isNaN(maxItems) || (maxItems ?? 0) < 0)) {
return Response.json(
{ message: "Invalid maxItems parameter" },
{ status: 400 },
);
}
const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true";
const opts = {
minPrice,
maxPrice,
strictMode,
exclusions,
keywords,
buyItNowOnly,
canadaOnly,
maxItems,
};
if (hideUnstableResults) { if (hideUnstableResults) {
const items = await fetchEbayItems(SEARCH_QUERY, 1, opts, { const items = await fetchEbayItems(SEARCH_QUERY, 1, opts, {
hideUnstableResults: true, hideUnstableResults: true,
}); });
if (items.results.length === 0 && items.unstableResults.length === 0) { if (items.results.length === 0 && items.unstableResults.length === 0) {
return Response.json( return emptySearchResponse();
{ message: "Search didn't return any results!" },
{ status: 404 },
);
} }
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
} }
@@ -84,11 +73,9 @@ export async function ebayRoute(req: Request): Promise<Response> {
const items = await fetchEbayItems(SEARCH_QUERY, 1, opts); const items = await fetchEbayItems(SEARCH_QUERY, 1, opts);
const isEmpty = !items || items.length === 0; const isEmpty = !items || items.length === 0;
if (isEmpty) if (isEmpty) {
return Response.json( return emptySearchResponse();
{ message: "Search didn't return any results!" }, }
{ status: 404 },
);
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
} catch (error) { } catch (error) {
logger.error("eBay scraping error:", error); logger.error("eBay scraping error:", error);

View File

@@ -1,5 +1,10 @@
import { fetchFacebookItems } from "@marketplace-scrapers/core"; import { fetchFacebookItems } from "@marketplace-scrapers/core";
import { logger } from "../logger"; import { logger } from "../logger";
import {
emptySearchResponse,
getRequiredSearchQuery,
parseNonNegativeIntegerParam,
} from "./helpers";
/** /**
* GET /api/facebook?q={query}&location={location} * GET /api/facebook?q={query}&location={location}
@@ -8,24 +13,19 @@ import { logger } from "../logger";
export async function facebookRoute(req: Request): Promise<Response> { export async function facebookRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url); const reqUrl = new URL(req.url);
const SEARCH_QUERY = const SEARCH_QUERY = getRequiredSearchQuery(req);
req.headers.get("query") || reqUrl.searchParams.get("q") || null; if (SEARCH_QUERY instanceof Response) {
if (!SEARCH_QUERY) return SEARCH_QUERY;
return Response.json( }
{
message: "Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const LOCATION = reqUrl.searchParams.get("location") || "toronto"; const LOCATION = reqUrl.searchParams.get("location") || "toronto";
const maxItemsParam = reqUrl.searchParams.get("maxItems"); const maxItems = parseNonNegativeIntegerParam(
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : 25; reqUrl.searchParams,
if (maxItemsParam && (Number.isNaN(maxItems) || maxItems < 0)) { "maxItems",
return Response.json( 25,
{ message: "Invalid maxItems parameter" }, );
{ status: 400 }, if (maxItems instanceof Response) {
); return maxItems;
} }
const hideUnstableResults = const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true"; reqUrl.searchParams.get("unstableFilter") === "true";
@@ -42,20 +42,15 @@ export async function facebookRoute(req: Request): Promise<Response> {
}, },
); );
if (items.results.length === 0 && items.unstableResults.length === 0) { if (items.results.length === 0 && items.unstableResults.length === 0) {
return Response.json( return emptySearchResponse();
{ message: "Search didn't return any results!" },
{ status: 404 },
);
} }
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
} }
const items = await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems); const items = await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems);
if (!items || items.length === 0) if (!items || items.length === 0) {
return Response.json( return emptySearchResponse();
{ message: "Search didn't return any results!" }, }
{ status: 404 },
);
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
} catch (error) { } catch (error) {
logger.error("Facebook scraping error:", error); logger.error("Facebook scraping error:", error);

View File

@@ -0,0 +1,64 @@
export function getRequiredSearchQuery(req: Request): string | Response {
const reqUrl = new URL(req.url);
const query = req.headers.get("query") || reqUrl.searchParams.get("q");
if (!query) {
return Response.json(
{
message: "Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
}
return query;
}
export function parseNonNegativeIntegerParam(
searchParams: URLSearchParams,
name: string,
defaultValue: number,
): number | Response;
export function parseNonNegativeIntegerParam(
searchParams: URLSearchParams,
name: string,
): number | undefined | Response;
export function parseNonNegativeIntegerParam(
searchParams: URLSearchParams,
name: string,
defaultValue?: number,
): number | undefined | Response {
const rawValue = searchParams.get(name);
if (rawValue === null) {
return defaultValue;
}
if (!/^\d+$/.test(rawValue)) {
return Response.json(
{ message: `Invalid ${name} parameter` },
{ status: 400 },
);
}
return Number(rawValue);
}
export function parseDollarPriceParam(
searchParams: URLSearchParams,
name: string,
): number | undefined | Response {
const rawValue = searchParams.get(name);
if (rawValue === null) {
return undefined;
}
if (!/^\d+(?:\.\d{1,2})?$/.test(rawValue)) {
return Response.json(
{ message: `Invalid ${name} parameter` },
{ status: 400 },
);
}
return Math.round(Number(rawValue) * 100);
}
export function emptySearchResponse(hint?: string): Response {
const message = hint
? `Search didn't return any results! ${hint}`
: "Search didn't return any results!";
return Response.json({ message }, { status: 404 });
}

View File

@@ -1,5 +1,11 @@
import { fetchKijijiItems } from "@marketplace-scrapers/core"; import { fetchKijijiItems } from "@marketplace-scrapers/core";
import { logger } from "../logger"; import { logger } from "../logger";
import {
emptySearchResponse,
getRequiredSearchQuery,
parseDollarPriceParam,
parseNonNegativeIntegerParam,
} from "./helpers";
/** /**
* GET /api/kijiji?q={query} * GET /api/kijiji?q={query}
@@ -8,39 +14,26 @@ import { logger } from "../logger";
export async function kijijiRoute(req: Request): Promise<Response> { export async function kijijiRoute(req: Request): Promise<Response> {
const reqUrl = new URL(req.url); const reqUrl = new URL(req.url);
const SEARCH_QUERY = const SEARCH_QUERY = getRequiredSearchQuery(req);
req.headers.get("query") || reqUrl.searchParams.get("q") || null; if (SEARCH_QUERY instanceof Response) {
if (!SEARCH_QUERY) return SEARCH_QUERY;
return Response.json( }
{
message: "Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const maxPagesParam = reqUrl.searchParams.get("maxPages"); const maxPages = parseNonNegativeIntegerParam(
const maxPages = maxPagesParam ? parseInt(maxPagesParam, 10) : 5; reqUrl.searchParams,
if (maxPagesParam && (Number.isNaN(maxPages) || maxPages < 0)) { "maxPages",
return Response.json( 5,
{ message: "Invalid maxPages parameter" }, );
{ status: 400 }, if (maxPages instanceof Response) {
); return maxPages;
} }
const priceMinParam = reqUrl.searchParams.get("priceMin"); const priceMin = parseDollarPriceParam(reqUrl.searchParams, "priceMin");
const priceMin = priceMinParam ? parseInt(priceMinParam, 10) : undefined; if (priceMin instanceof Response) {
if (priceMinParam && (Number.isNaN(priceMin) || (priceMin ?? 0) < 0)) { return priceMin;
return Response.json(
{ message: "Invalid priceMin parameter" },
{ status: 400 },
);
} }
const priceMaxParam = reqUrl.searchParams.get("priceMax"); const priceMax = parseDollarPriceParam(reqUrl.searchParams, "priceMax");
const priceMax = priceMaxParam ? parseInt(priceMaxParam, 10) : undefined; if (priceMax instanceof Response) {
if (priceMaxParam && (Number.isNaN(priceMax) || (priceMax ?? 0) < 0)) { return priceMax;
return Response.json(
{ message: "Invalid priceMax parameter" },
{ status: 400 },
);
} }
const hideUnstableResults = const hideUnstableResults =
reqUrl.searchParams.get("unstableFilter") === "true"; reqUrl.searchParams.get("unstableFilter") === "true";
@@ -62,7 +55,6 @@ export async function kijijiRoute(req: Request): Promise<Response> {
maxPages, maxPages,
priceMin, priceMin,
priceMax, priceMax,
cookies: reqUrl.searchParams.get("cookies") || undefined,
}; };
try { try {
@@ -76,9 +68,9 @@ export async function kijijiRoute(req: Request): Promise<Response> {
{ hideUnstableResults: true }, { hideUnstableResults: true },
); );
if (items.results.length === 0 && items.unstableResults.length === 0) { if (items.results.length === 0 && items.unstableResults.length === 0) {
return Response.json( return emptySearchResponse(
{ message: "Search didn't return any results!" }, `Kijiji matches ALL words in the query against listing titles. ` +
{ status: 404 }, `Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
); );
} }
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
@@ -91,11 +83,12 @@ export async function kijijiRoute(req: Request): Promise<Response> {
searchOptions, searchOptions,
{}, {},
); );
if (!items || items.length === 0) if (!items || items.length === 0) {
return Response.json( return emptySearchResponse(
{ message: "Search didn't return any results!" }, `Kijiji matches ALL words in the query against listing titles. ` +
{ status: 404 }, `Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
); );
}
return Response.json(items, { status: 200 }); return Response.json(items, { status: 200 });
} catch (error) { } catch (error) {
logger.error("Kijiji scraping error:", error); logger.error("Kijiji scraping error:", error);

View File

@@ -76,7 +76,7 @@ describe("API routes", () => {
}); });
}); });
test("kijijiRoute passes cookies query parameter", async () => { test("kijijiRoute ignores cookies query parameter", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji"); const { kijijiRoute } = await import("../src/routes/kijiji");
await kijijiRoute( await kijijiRoute(
@@ -98,7 +98,6 @@ describe("API routes", () => {
maxPages: 3, maxPages: 3,
priceMin: undefined, priceMin: undefined,
priceMax: undefined, priceMax: undefined,
cookies: "s=1",
}, },
{}, {},
); );
@@ -188,7 +187,6 @@ describe("API routes", () => {
maxPages: 5, maxPages: 5,
priceMin: undefined, priceMin: undefined,
priceMax: undefined, priceMax: undefined,
cookies: undefined,
}, },
{}, {},
{ {
@@ -279,12 +277,29 @@ describe("API routes", () => {
maxPages: 5, maxPages: 5,
priceMin: undefined, priceMin: undefined,
priceMax: undefined, priceMax: undefined,
cookies: undefined,
}, },
{}, {},
); );
}); });
test("kijijiRoute forwards dollar price filters to core as cents", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
await kijijiRoute(
new Request(
"http://localhost/api/kijiji?q=laptop&priceMin=999.99&priceMax=1000",
),
);
expect(fetchKijijiItems).toHaveBeenCalledWith(
"laptop",
4,
"https://www.kijiji.ca",
expect.objectContaining({ priceMin: 99_999, priceMax: 100_000 }),
{},
);
});
test("kijijiRoute does not forward unstableFilter when false", async () => { test("kijijiRoute does not forward unstableFilter when false", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji"); const { kijijiRoute } = await import("../src/routes/kijiji");
@@ -307,7 +322,6 @@ describe("API routes", () => {
maxPages: 5, maxPages: 5,
priceMin: undefined, priceMin: undefined,
priceMax: undefined, priceMax: undefined,
cookies: undefined,
}, },
{}, {},
); );
@@ -398,7 +412,8 @@ describe("API routes", () => {
expect(response.status).toBe(404); expect(response.status).toBe(404);
const body = await response.json(); const body = await response.json();
expect(body.message).toBe("Search didn't return any results!"); expect(body.message).toStartWith("Search didn't return any results!");
expect(body.message).toContain("Kijiji matches ALL words");
}); });
test("ebayRoute forwards maxItems to core in default mode", async () => { test("ebayRoute forwards maxItems to core in default mode", async () => {
@@ -417,6 +432,24 @@ describe("API routes", () => {
); );
}); });
test("ebayRoute forwards dollar price filters to core as cents", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
fetchEbayItems.mockImplementation(() => Promise.resolve([{ title: "a" }]));
await ebayRoute(
new Request(
"http://localhost/api/ebay?q=macbook&minPrice=999.99&maxPrice=1000",
),
);
expect(fetchEbayItems).toHaveBeenCalledWith(
"macbook",
1,
expect.objectContaining({ minPrice: 99_999, maxPrice: 100_000 }),
);
});
test("ebayRoute passes through scraper payload unchanged in unstable mode", async () => { test("ebayRoute passes through scraper payload unchanged in unstable mode", async () => {
const { ebayRoute } = await import("../src/routes/ebay"); const { ebayRoute } = await import("../src/routes/ebay");
@@ -505,6 +538,66 @@ describe("API routes", () => {
expect(body.message).toBe("Invalid maxItems parameter"); expect(body.message).toBe("Invalid maxItems parameter");
}); });
test("ebayRoute returns 400 for non-integer maxItems", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for decimal maxItems", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems=1.5"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for empty maxItems", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for whitespace maxItems", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for hex maxItems", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxItems=0x10"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("facebookRoute returns 400 for invalid maxItems", async () => { test("facebookRoute returns 400 for invalid maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook"); const { facebookRoute } = await import("../src/routes/facebook");
@@ -517,6 +610,150 @@ describe("API routes", () => {
expect(body.message).toBe("Invalid maxItems parameter"); expect(body.message).toBe("Invalid maxItems parameter");
}); });
test("facebookRoute returns 400 for non-integer maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request("http://localhost/api/facebook?q=laptop&maxItems=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("facebookRoute returns 400 for decimal maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request("http://localhost/api/facebook?q=laptop&maxItems=1.5"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("facebookRoute returns 400 for empty maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request("http://localhost/api/facebook?q=laptop&maxItems="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("facebookRoute returns 400 for whitespace maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request("http://localhost/api/facebook?q=laptop&maxItems=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("facebookRoute returns 400 for hex maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook");
const response = await facebookRoute(
new Request("http://localhost/api/facebook?q=laptop&maxItems=0x10"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxItems parameter");
});
test("ebayRoute returns 400 for empty minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&minPrice="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid minPrice parameter");
});
test("ebayRoute returns 400 for whitespace minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&minPrice=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid minPrice parameter");
});
test("ebayRoute returns 400 for hex minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&minPrice=0x10"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid minPrice parameter");
});
test("ebayRoute returns 400 for empty maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPrice parameter");
});
test("ebayRoute returns 400 for whitespace maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPrice parameter");
});
test("ebayRoute returns 400 for hex maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice=0x10"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPrice parameter");
});
test("ebayRoute returns 400 for non-integer minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&minPrice=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid minPrice parameter");
});
test("ebayRoute returns 400 for invalid minPrice", async () => { test("ebayRoute returns 400 for invalid minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay"); const { ebayRoute } = await import("../src/routes/ebay");
@@ -529,6 +766,32 @@ describe("API routes", () => {
expect(body.message).toBe("Invalid minPrice parameter"); expect(body.message).toBe("Invalid minPrice parameter");
}); });
test("ebayRoute accepts decimal minPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&minPrice=1.5"),
);
expect(fetchEbayItems).toHaveBeenCalledWith(
"laptop",
1,
expect.objectContaining({ minPrice: 150 }),
);
});
test("ebayRoute returns 400 for non-integer maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
const response = await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPrice parameter");
});
test("ebayRoute returns 400 for invalid maxPrice", async () => { test("ebayRoute returns 400 for invalid maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay"); const { ebayRoute } = await import("../src/routes/ebay");
@@ -541,6 +804,32 @@ describe("API routes", () => {
expect(body.message).toBe("Invalid maxPrice parameter"); expect(body.message).toBe("Invalid maxPrice parameter");
}); });
test("ebayRoute accepts decimal maxPrice", async () => {
const { ebayRoute } = await import("../src/routes/ebay");
await ebayRoute(
new Request("http://localhost/api/ebay?q=laptop&maxPrice=1.5"),
);
expect(fetchEbayItems).toHaveBeenCalledWith(
"laptop",
1,
expect.objectContaining({ maxPrice: 150 }),
);
});
test("kijijiRoute returns 400 for decimal maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&maxPages=1.5"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPages parameter");
});
test("kijijiRoute returns 400 for invalid maxPages", async () => { test("kijijiRoute returns 400 for invalid maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji"); const { kijijiRoute } = await import("../src/routes/kijiji");
@@ -553,6 +842,54 @@ describe("API routes", () => {
expect(body.message).toBe("Invalid maxPages parameter"); expect(body.message).toBe("Invalid maxPages parameter");
}); });
test("kijijiRoute returns 400 for non-integer maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&maxPages=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPages parameter");
});
test("kijijiRoute returns 400 for empty maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&maxPages="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPages parameter");
});
test("kijijiRoute returns 400 for whitespace maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&maxPages=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPages parameter");
});
test("kijijiRoute returns 400 for hex maxPages", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&maxPages=0x10"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid maxPages parameter");
});
test("kijijiRoute returns 400 for invalid priceMin", async () => { test("kijijiRoute returns 400 for invalid priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji"); const { kijijiRoute } = await import("../src/routes/kijiji");
@@ -565,6 +902,70 @@ describe("API routes", () => {
expect(body.message).toBe("Invalid priceMin parameter"); expect(body.message).toBe("Invalid priceMin parameter");
}); });
test("kijijiRoute accepts decimal priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMin=1.5"),
);
expect(fetchKijijiItems).toHaveBeenCalledWith(
"laptop",
4,
"https://www.kijiji.ca",
expect.objectContaining({ priceMin: 150 }),
{},
);
});
test("kijijiRoute returns 400 for non-integer priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMin=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMin parameter");
});
test("kijijiRoute returns 400 for empty priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMin="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMin parameter");
});
test("kijijiRoute returns 400 for whitespace priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMin=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMin parameter");
});
test("kijijiRoute returns 400 for hex priceMin", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMin=0x10"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMin parameter");
});
test("kijijiRoute returns 400 for invalid priceMax", async () => { test("kijijiRoute returns 400 for invalid priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji"); const { kijijiRoute } = await import("../src/routes/kijiji");
@@ -577,6 +978,70 @@ describe("API routes", () => {
expect(body.message).toBe("Invalid priceMax parameter"); expect(body.message).toBe("Invalid priceMax parameter");
}); });
test("kijijiRoute accepts decimal priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMax=1.5"),
);
expect(fetchKijijiItems).toHaveBeenCalledWith(
"laptop",
4,
"https://www.kijiji.ca",
expect.objectContaining({ priceMax: 150 }),
{},
);
});
test("kijijiRoute returns 400 for non-integer priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMax=10abc"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMax parameter");
});
test("kijijiRoute returns 400 for empty priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMax="),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMax parameter");
});
test("kijijiRoute returns 400 for whitespace priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMax=%20%20"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMax parameter");
});
test("kijijiRoute returns 400 for hex priceMax", async () => {
const { kijijiRoute } = await import("../src/routes/kijiji");
const response = await kijijiRoute(
new Request("http://localhost/api/kijiji?q=laptop&priceMax=0x10"),
);
expect(response.status).toBe(400);
const body = await response.json();
expect(body.message).toBe("Invalid priceMax parameter");
});
test("facebookRoute returns 400 for negative maxItems", async () => { test("facebookRoute returns 400 for negative maxItems", async () => {
const { facebookRoute } = await import("../src/routes/facebook"); const { facebookRoute } = await import("../src/routes/facebook");

View File

@@ -5,5 +5,5 @@
"@/*": ["./src/*"] "@/*": ["./src/*"]
} }
}, },
"include": ["./src", "./test"] "include": ["./src", "./test", "../../types/**/*.d.ts"]
} }

View File

@@ -18,6 +18,7 @@
- Isolate marketplace-specific hacks/selectors inside the owning scraper file unless they are genuinely shared. - Isolate marketplace-specific hacks/selectors inside the owning scraper file unless they are genuinely shared.
- If a new helper is scraper-local, keep it local. Do not promote it into `utils` early. - If a new helper is scraper-local, keep it local. Do not promote it into `utils` early.
- If you change shared types or exports, check downstream imports in both adapter packages. - If you change shared types or exports, check downstream imports in both adapter packages.
- eBay SplashUI challenge handling needs raw `fetch` for manual redirects and `getSetCookie()`; use `fetchHtml` only once the flow only needs final HTML.
## Tests ## Tests

View File

@@ -11,6 +11,7 @@
}, },
"dependencies": { "dependencies": {
"@typescript/native-preview": "catalog:", "@typescript/native-preview": "catalog:",
"argon2-wasm-pro": "1.1.0",
"cli-progress": "^3.12.0", "cli-progress": "^3.12.0",
"linkedom": "^0.18.12", "linkedom": "^0.18.12",
"unidecode": "^1.1.0" "unidecode": "^1.1.0"

View File

@@ -39,6 +39,7 @@ export * from "./types/common";
// Export shared utilities // Export shared utilities
export * from "./utils/cookies"; export * from "./utils/cookies";
export * from "./utils/delay"; export * from "./utils/delay";
export * from "./utils/ebay-challenge";
export * from "./utils/format"; export * from "./utils/format";
export * from "./utils/http"; export * from "./utils/http";
export * from "./utils/unstable"; export * from "./utils/unstable";

View File

@@ -10,6 +10,8 @@ import {
formatCookiesForHeader, formatCookiesForHeader,
} from "../utils/cookies"; } from "../utils/cookies";
import { delay } from "../utils/delay"; import { delay } from "../utils/delay";
import { solveEbayChallenge } from "../utils/ebay-challenge";
import { fetchHtml, HttpError, RateLimitError } from "../utils/http";
import { logger } from "../utils/logger"; import { logger } from "../utils/logger";
import { classifyUnstableListings } from "../utils/unstable"; import { classifyUnstableListings } from "../utils/unstable";
@@ -40,6 +42,229 @@ export interface EbayListingDetails {
} }
const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C|US)\s*\$|\s*[$£¥])/u; const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C|US)\s*\$|\s*[$£¥])/u;
const EBAY_ITEM_URL_RE = /^https?:\/\/(?:www\.)?ebay\.(?:ca|com)\/itm\//u;
function decodeHtmlEntities(value: string): string {
return value
.replace(/&amp;/g, "&")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.trim();
}
function stripHtml(value: string): string {
return decodeHtmlEntities(
value.replace(/<[^>]*>/g, " ").replace(/\s+/g, " "),
);
}
function getHtmlAttr(tag: string, attrName: string): string | null {
const attrMatch = tag.match(
new RegExp(`\\s${attrName}=(?:"([^"]*)"|'([^']*)'|([^\\s>]+))`, "iu"),
);
return attrMatch?.[1] ?? attrMatch?.[2] ?? attrMatch?.[3] ?? null;
}
function normalizeEbayUrl(url: string): string | null {
const decodedUrl = decodeHtmlEntities(url);
try {
const parsed = new URL(decodedUrl, "https://www.ebay.ca");
return EBAY_ITEM_URL_RE.test(parsed.href) ? parsed.href : null;
} catch {
return null;
}
}
function toEbayListing(
url: string,
title: string,
priceText: string,
): EbayListingDetails | null {
const normalizedUrl = normalizeEbayUrl(url);
const cleanedTitle = stripHtml(title);
const cleanedPrice = stripHtml(priceText);
const priceInfo = parseEbayPrice(cleanedPrice);
if (!normalizedUrl || !cleanedTitle || cleanedTitle === "Shop on eBay") {
return null;
}
if (!priceInfo) return null;
return {
url: normalizedUrl,
title: cleanedTitle,
listingPrice: {
amountFormatted: cleanedPrice,
cents: priceInfo.cents,
currency: priceInfo.currency,
},
listingType: "OFFER",
listingStatus: "ACTIVE",
address: null,
};
}
function readObjectString(
value: Record<string, unknown>,
keys: string[],
): string | null {
for (const key of keys) {
const candidate = value[key];
if (typeof candidate === "string" && candidate.trim()) {
return candidate.trim();
}
}
return null;
}
function readPayloadPrice(value: Record<string, unknown>): string | null {
const directPrice = readObjectString(value, [
"price",
"currentPrice",
"displayPrice",
]);
if (directPrice) return directPrice;
for (const key of ["price", "currentPrice", "displayPrice", "priceInfo"]) {
const candidate = value[key];
if (
!candidate ||
typeof candidate !== "object" ||
Array.isArray(candidate)
) {
continue;
}
const priceObject = candidate as Record<string, unknown>;
const formatted = readObjectString(priceObject, [
"amount",
"formatted",
"text",
]);
if (formatted) return formatted;
const numericValue = priceObject.value;
const currency = readObjectString(priceObject, [
"currency",
"currencyCode",
]);
if (typeof numericValue === "string" && numericValue.trim()) {
return currency ? `${currency} ${numericValue}` : numericValue;
}
if (typeof numericValue === "number") {
return currency ? `${currency} ${numericValue}` : String(numericValue);
}
}
return null;
}
function collectPayloadListings(
value: unknown,
results: EbayListingDetails[],
): void {
if (!value || typeof value !== "object") return;
if (Array.isArray(value)) {
for (const item of value) {
collectPayloadListings(item, results);
}
return;
}
const objectValue = value as Record<string, unknown>;
const url = readObjectString(objectValue, [
"itemWebUrl",
"itemUrl",
"url",
"webUrl",
]);
const title = readObjectString(objectValue, ["title", "itemTitle", "name"]);
const priceText = readPayloadPrice(objectValue);
if (url && title && priceText) {
const listing = toEbayListing(url, title, priceText);
if (listing) {
results.push(listing);
return;
}
}
for (const child of Object.values(objectValue)) {
collectPayloadListings(child, results);
}
}
function parseEmbeddedEbayListings(
htmlString: HTMLString,
): EbayListingDetails[] {
const results: EbayListingDetails[] = [];
const payloadMatches = htmlString.matchAll(
/data-inlinepayload=(?:"([^"]*)"|'([^']*)'|([^\s>]+))/giu,
);
for (const match of payloadMatches) {
const rawPayload = match[1] ?? match[2] ?? match[3];
if (!rawPayload) continue;
try {
const decodedPayload = decodeURIComponent(decodeHtmlEntities(rawPayload));
collectPayloadListings(JSON.parse(decodedPayload), results);
} catch {
// eBay inline payloads vary by module; non-JSON payloads are ignored.
}
}
return results;
}
function parseSCardHtmlListings(htmlString: HTMLString): EbayListingDetails[] {
const results: EbayListingDetails[] = [];
const cardMatches = htmlString.matchAll(
/<div\b[^>]*class=(?:"[^"]*\bs-card\b[^"]*"|'[^']*\bs-card\b[^']*'|[^\s>]*\bs-card\b[^\s>]*)[\s\S]*?(?=<div\b[^>]*class=(?:"[^"]*\bs-card\b[^"]*"|'[^']*\bs-card\b[^']*'|[^\s>]*\bs-card\b[^\s>]*)|<\/body>|<\/html>)/giu,
);
for (const cardMatch of cardMatches) {
const cardHtml = cardMatch[0];
const linkTag = cardHtml.match(
/<a\b[^>]*\bhref=(?:"[^"]*\/itm\/[^"]*"|'[^']*\/itm\/[^']*'|[^\s>]*\/itm\/[^\s>]*)[^>]*>/iu,
)?.[0];
const titleMatch = cardHtml.match(
/<[^>]*\bclass=(?:"[^"]*\bs-card__title\b[^"]*"|'[^']*\bs-card__title\b[^']*'|[^\s>]*\bs-card__title\b[^\s>]*)[^>]*>([\s\S]*?)<\/[^>]+>/iu,
);
const priceMatch = cardHtml.match(
/<[^>]*\bclass=(?:"[^"]*\bs-card__price\b[^"]*"|'[^']*\bs-card__price\b[^']*'|[^\s>]*\bs-card__price\b[^\s>]*)[^>]*>([\s\S]*?)<\/[^>]+>/iu,
);
if (!linkTag || !titleMatch?.[1] || !priceMatch?.[1]) continue;
const href = getHtmlAttr(linkTag, "href");
if (!href) continue;
const listing = toEbayListing(href, titleMatch[1], priceMatch[1]);
if (listing) results.push(listing);
}
return results;
}
function dedupeEbayListings(
listings: EbayListingDetails[],
): EbayListingDetails[] {
const results: EbayListingDetails[] = [];
const seenUrls = new Set<string>();
for (const listing of listings) {
const canonicalUrl = canonicalizeEbayItemUrl(listing.url);
if (seenUrls.has(canonicalUrl)) continue;
seenUrls.add(canonicalUrl);
results.push(listing);
}
return results;
}
function canonicalizeEbayItemUrl(url: string): string { function canonicalizeEbayItemUrl(url: string): string {
try { try {
@@ -102,17 +327,6 @@ function parseEbayPrice(
return { cents, currency }; return { cents, currency };
} }
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- Parsing ----------------------------- // ----------------------------- Parsing -----------------------------
/** /**
@@ -124,6 +338,11 @@ function parseEbayListings(
exclusions: string[], exclusions: string[],
strictMode: boolean, strictMode: boolean,
): EbayListingDetails[] { ): EbayListingDetails[] {
const embeddedListings = parseEmbeddedEbayListings(htmlString);
if (embeddedListings.length > 0) {
return dedupeEbayListings(embeddedListings);
}
const { document } = parseHTML(htmlString); const { document } = parseHTML(htmlString);
const results: EbayListingDetails[] = []; const results: EbayListingDetails[] = [];
const seenUrls = new Set<string>(); const seenUrls = new Set<string>();
@@ -359,13 +578,34 @@ function parseEbayListings(
} }
} }
return results; if (results.length > 0) {
return results;
}
return dedupeEbayListings(
parseSCardHtmlListings(htmlString).filter((listing) => {
if (
exclusions.some((exclusion) =>
listing.title.toLowerCase().includes(exclusion.toLowerCase()),
)
) {
return false;
}
return (
!strictMode ||
keywords.some((keyword) =>
listing.title.toLowerCase().includes(keyword.toLowerCase()),
)
);
}),
);
} }
// ----------------------------- Cookie Loading ----------------------------- // ----------------------------- Session & Challenge -----------------------------
/** /**
* Load eBay cookies from EBAY_COOKIE * Load eBay cookies from EBAY_COOKIE env var
*/ */
async function loadEbayCookies(): Promise<string | undefined> { async function loadEbayCookies(): Promise<string | undefined> {
try { try {
@@ -379,6 +619,92 @@ async function loadEbayCookies(): Promise<string | undefined> {
} }
} }
const EBAY_UA =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
/**
* Visit eBay homepage to collect Akamai fingerprinting cookies.
* These are required to pass the edge layer before any search request.
*/
async function warmEbaySession(): Promise<string | undefined> {
try {
const res = await fetch("https://www.ebay.ca", {
headers: {
"User-Agent": EBAY_UA,
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
},
redirect: "manual",
});
if (!res.ok) return undefined;
const setCookies = res.headers.getSetCookie?.() ?? [];
const jar: Record<string, string> = {};
for (const header of setCookies) {
const match = header.match(/^([^=]+)=([^;]+)/);
if (match?.[1] && match[2]) jar[match[1]] = match[2];
}
const cookieKeys = Object.keys(jar);
if (cookieKeys.length === 0) return undefined;
return cookieKeys.map((k) => `${k}=${jar[k] ?? ""}`).join("; ");
} catch {
return undefined;
}
}
function mergeCookies(
base: string,
...additions: (string | undefined)[]
): string {
const jar: Record<string, string> = {};
const all = [base, ...additions.filter(Boolean)] as string[];
for (const str of all) {
for (const pair of str.split(";")) {
const eq = pair.indexOf("=");
if (eq > 0) {
jar[pair.substring(0, eq).trim()] = pair.substring(eq + 1).trim();
}
}
}
return Object.entries(jar)
.map(([k, v]) => `${k}=${v}`)
.join("; ");
}
function collectResponseCookies(res: Response, jar: Record<string, string>) {
for (const header of res.headers.getSetCookie?.() ?? []) {
const match = header.match(/^([^=]+)=([^;]+)/);
if (match?.[1] && match[2]) jar[match[1]] = match[2];
}
}
function cookiesToString(jar: Record<string, string>): string {
return Object.entries(jar)
.map(([k, v]) => `${k}=${v}`)
.join("; ");
}
const CHALLENGE_REDIRECT = 307;
const CHALLENGE_MARKER = "splashui/challenge";
function isChallengeRedirect(res: Response): boolean {
return (
res.status === CHALLENGE_REDIRECT &&
(res.headers.get("location") ?? "").includes(CHALLENGE_MARKER)
);
}
function isChallengeHtml(html: string): boolean {
return (
html.length < 50000 &&
(html.includes("_crefId") || html.includes("_cdetail"))
);
}
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchEbayItems( export default async function fetchEbayItems(
@@ -454,7 +780,10 @@ export default async function fetchEbayItems(
return classifyUnstableListings(limitedListings); return classifyUnstableListings(limitedListings);
}; };
const cookies = await loadEbayCookies(); // Collect cookies from env var + warm-up session
const envCookies = await loadEbayCookies();
const warmCookies = await warmEbaySession();
const baseCookies = mergeCookies(envCookies ?? "", warmCookies);
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference // Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
const urlParams = new URLSearchParams({ const urlParams = new URLSearchParams({
@@ -478,33 +807,104 @@ export default async function fetchEbayItems(
logger.log(`Fetching eBay search: ${searchUrl}`); logger.log(`Fetching eBay search: ${searchUrl}`);
try { try {
// Use custom headers modeled after real browser requests to bypass bot detection const searchHeaders: Record<string, string> = {
const headers: Record<string, string> = { "User-Agent": EBAY_UA,
"User-Agent": Accept:
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br, zstd",
Referer: "https://www.ebay.ca/", Referer: "https://www.ebay.ca/",
Connection: "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
Priority: "u=0, i",
}; };
// Add cookies if available (helps bypass bot detection) if (baseCookies) {
if (cookies) { searchHeaders.Cookie = baseCookies;
headers.Cookie = cookies;
} }
const res = await fetch(searchUrl, { // Step 1: Make search request (follow redirects for challenge flow)
let res = await fetch(searchUrl, {
method: "GET", method: "GET",
headers, headers: searchHeaders,
redirect: "manual",
}); });
const cookieJar: Record<string, string> = {};
// Collect cookies from homepage warm-up
if (baseCookies) {
for (const pair of baseCookies.split(";")) {
const eq = pair.indexOf("=");
if (eq > 0) {
cookieJar[pair.substring(0, eq).trim()] = pair
.substring(eq + 1)
.trim();
}
}
}
// Step 2: Follow challenge redirect if present
if (isChallengeRedirect(res)) {
const chalUrl = res.headers.get("location") ?? "";
collectResponseCookies(res, cookieJar);
logger.log("Challenge detected, fetching challenge page...");
res = await fetch(chalUrl, {
headers: { ...searchHeaders, Cookie: cookiesToString(cookieJar) },
redirect: "manual",
});
collectResponseCookies(res, cookieJar);
}
// Step 3: If response is challenge HTML, solve and submit
const responseHtml = await res.text();
if (isChallengeHtml(responseHtml)) {
logger.log("Solving challenge...");
const result = await solveEbayChallenge(
responseHtml,
cookiesToString(cookieJar),
);
if (result) {
// Merge answer cookies into jar
if (baseCookies) {
searchHeaders.Cookie = mergeCookies(baseCookies, result.cookies);
} else {
searchHeaders.Cookie = result.cookies;
}
logger.log("Challenge solved, retrying search...");
// Delay briefly before retry
await delay(DELAY_MS);
const retryHtml = await fetchHtml(searchUrl, DELAY_MS, {
headers: searchHeaders,
});
const listings = parseEbayListings(
retryHtml,
keywords,
exclusions,
strictMode,
);
const filteredListings = listings.filter((listing) => {
const cents = listing.listingPrice?.cents;
return (
typeof cents === "number" && cents >= minPrice && cents <= maxPrice
);
});
logger.log(
`Parsed ${filteredListings.length} eBay listings (after challenge).`,
);
return finalizeResults(filteredListings);
}
logger.warn("Challenge solve failed, returning empty results.");
return finalizeResults([]);
}
// Step 4: Normal flow — no challenge
if (!res.ok) { if (!res.ok) {
throw new HttpError( throw new HttpError(
`Request failed with status ${res.status}`, `Request failed with status ${res.status}`,
@@ -513,20 +913,17 @@ export default async function fetchEbayItems(
); );
} }
const searchHtml = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS); await delay(DELAY_MS);
logger.log(`\nParsing eBay listings...`); logger.log(`\nParsing eBay listings...`);
const listings = parseEbayListings( const listings = parseEbayListings(
searchHtml, responseHtml,
keywords, keywords,
exclusions, exclusions,
strictMode, strictMode,
); );
// Filter by price range (additional safety check)
const filteredListings = listings.filter((listing) => { const filteredListings = listings.filter((listing) => {
const cents = listing.listingPrice?.cents; const cents = listing.listingPrice?.cents;
return ( return (
@@ -537,9 +934,9 @@ export default async function fetchEbayItems(
logger.log(`Parsed ${filteredListings.length} eBay listings.`); logger.log(`Parsed ${filteredListings.length} eBay listings.`);
return finalizeResults(filteredListings); return finalizeResults(filteredListings);
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError || err instanceof RateLimitError) {
console.error( logger.warn(
`Failed to fetch eBay search (${err.status}): ${err.message}`, `Failed to fetch eBay search (${err instanceof HttpError ? err.statusCode : 429}): ${err.message}`,
); );
return finalizeResults([]); return finalizeResults([]);
} }

View File

@@ -10,20 +10,26 @@ import {
type CookieConfig, type CookieConfig,
ensureCookies, ensureCookies,
formatCookiesForHeader, formatCookiesForHeader,
loadCookiesOptional,
parseCookieString, parseCookieString,
} from "../utils/cookies"; } from "../utils/cookies";
import { delay } from "../utils/delay"; import {
buildFacebookHeaders,
detectFacebookChallenge,
warmFacebookSession,
} from "../utils/facebook-challenge";
import { formatCentsToCurrency } from "../utils/format"; import { formatCentsToCurrency } from "../utils/format";
import { isRecord } from "../utils/http"; import { fetchHtml, HttpError, isRecord, RateLimitError } from "../utils/http";
import { logger } from "../utils/logger"; import { logger } from "../utils/logger";
import { classifyUnstableListings } from "../utils/unstable"; import { classifyUnstableListings } from "../utils/unstable";
/** /**
* Facebook Marketplace Scraper * Facebook Marketplace Scraper
* *
* Note: Facebook Marketplace requires authentication cookies for full access. * Facebook Marketplace returns search results without authentication when
* This implementation will return limited or no results without proper authentication. * proper browser headers are sent. Prices and seller details are hidden on
* This is by design to respect Facebook's authentication requirements. * search results but are available on individual item pages even without
* auth cookies. For full-price search results, provide FACEBOOK_COOKIE.
*/ */
// Facebook cookie configuration // Facebook cookie configuration
@@ -219,17 +225,6 @@ export async function ensureFacebookCookies(): Promise<Cookie[]> {
return ensureCookies(FACEBOOK_COOKIE_CONFIG); return ensureCookies(FACEBOOK_COOKIE_CONFIG);
} }
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- Extraction Metrics ----------------------------- // ----------------------------- Extraction Metrics -----------------------------
/** /**
@@ -274,112 +269,15 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
// ----------------------------- HTTP Client ----------------------------- // ----------------------------- HTTP Client -----------------------------
/** function createFacebookHeaders(cookies: string): Record<string, string> {
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls. const jar: Record<string, string> = {};
- Retries on 429 and 5xx if (cookies) {
- Respects X-RateLimit-Reset when present (seconds) for (const pair of cookies.split(";")) {
- Supports custom cookies for Facebook authentication const [name, ...rest] = pair.trim().split("=");
*/ if (name && rest.length > 0) jar[name.trim()] = rest.join("=").trim();
async function fetchHtml(
url: string,
DELAY_MS: number,
opts?: {
maxRetries?: number;
retryBaseMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void;
cookies?: string;
},
): Promise<{ html: HTMLString; responseUrl: string }> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500;
let lastRateLimitError: HttpError | null = null;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const headers: Record<string, string> = {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"accept-encoding": "gzip, deflate, br",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
};
// Add cookies if provided
if (opts?.cookies) {
headers.cookie = opts.cookies;
}
const res = await fetch(url, {
method: "GET",
headers,
});
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
lastRateLimitError = new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
if (attempt >= maxRetries) {
throw lastRateLimitError;
}
await delay(waitMs);
continue;
}
// For Facebook, 400 often means authentication required
// Don't retry 4xx client errors except 429
if (res.status >= 400 && res.status < 500 && res.status !== 429) {
throw new HttpError(
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
res.status,
url,
);
}
// Retry on 5xx
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await delay((attempt + 1) * retryBaseMs);
continue;
}
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
}
const html = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
return { html, responseUrl: res.url || url };
} catch (err) {
if (err instanceof HttpError) {
throw err;
}
if (attempt >= maxRetries) throw err;
await delay((attempt + 1) * retryBaseMs);
} }
} }
return buildFacebookHeaders(jar);
throw lastRateLimitError ?? new Error("Exhausted retries without response");
} }
// ----------------------------- Parsing ----------------------------- // ----------------------------- Parsing -----------------------------
@@ -389,13 +287,29 @@ export type FacebookResponseKind =
| "item" | "item"
| "auth_gated" | "auth_gated"
| "unavailable" | "unavailable"
| "checkpoint"
| "unknown"; | "unknown";
export function classifyFacebookResponse( export function classifyFacebookResponse(
htmlString: HTMLString, htmlString: HTMLString,
responseUrl: string, responseUrl: string,
status = 200,
) { ) {
const challengeType = detectFacebookChallenge(
status,
htmlString,
responseUrl,
);
if (challengeType === "checkpoint") {
return {
kind: "checkpoint" as const,
authGated: false,
unavailable: false,
};
}
const authGated = const authGated =
challengeType === "login_wall" ||
responseUrl.includes("/login/") || responseUrl.includes("/login/") ||
htmlString.includes("You must log in") || htmlString.includes("You must log in") ||
htmlString.includes("log in to continue"); htmlString.includes("log in to continue");
@@ -867,6 +781,22 @@ export function extractFacebookItemData(
return bestMatch.item; return bestMatch.item;
} }
// Try marketplace_product_details_page.target path (current item page structure)
for (const candidate of candidates) {
const detailsPage = findKeyInObject(
candidate,
"marketplace_product_details_page",
) as Record<string, unknown> | undefined;
const target = detailsPage?.target as Record<string, unknown> | undefined;
if (
target &&
typeof target.id === "string" &&
typeof target.marketplace_listing_title === "string"
) {
return target as unknown as FacebookMarketplaceItem;
}
}
if (htmlString.includes("XCometMarketplacePermalinkController")) { if (htmlString.includes("XCometMarketplacePermalinkController")) {
return extractFacebookItemHtmlFallback(htmlString); return extractFacebookItemHtmlFallback(htmlString);
} }
@@ -874,6 +804,25 @@ export function extractFacebookItemData(
return null; return null;
} }
function findKeyInObject(obj: unknown, targetKey: string): unknown {
if (obj == null) return undefined;
if (Array.isArray(obj)) {
for (const item of obj) {
const found = findKeyInObject(item, targetKey);
if (found !== undefined) return found;
}
return undefined;
}
if (typeof obj !== "object") return undefined;
const record = obj as Record<string, unknown>;
if (targetKey in record) return record[targetKey];
for (const [, value] of Object.entries(record)) {
const found = findKeyInObject(value, targetKey);
if (found !== undefined) return found;
}
return undefined;
}
/** /**
Parse Facebook marketplace search results into ListingDetails[] Parse Facebook marketplace search results into ListingDetails[]
*/ */
@@ -1130,16 +1079,18 @@ export default async function fetchFacebookItems(
}; };
}; };
const cookies = await ensureFacebookCookies(); const warmupCookies = await warmFacebookSession();
const warmupHeader = Object.entries(warmupCookies)
.map(([k, v]) => `${k}=${v}`)
.join("; ");
const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG);
// Format cookies for HTTP header
const domain = "www.facebook.com"; const domain = "www.facebook.com";
const cookiesHeader = formatCookiesForHeader(cookies, domain); const userCookiesHeader = formatCookiesForHeader(userCookies, domain);
if (!cookiesHeader) { const cookiesHeader = [warmupHeader, userCookiesHeader]
throw new Error( .filter(Boolean)
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.", .join("; ");
);
}
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond)); const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
@@ -1150,13 +1101,17 @@ export default async function fetchFacebookItems(
const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`; const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`;
logger.log(`Fetching Facebook marketplace: ${searchUrl}`); logger.log(`Fetching Facebook marketplace: ${searchUrl}`);
logger.log(`Using ${cookies.length} cookies for authentication`); if (userCookies.length > 0) {
logger.log(`Using ${userCookies.length} cookies for authentication`);
}
let searchHtml: string; let searchHtml: string;
let searchResponseUrl = searchUrl; let searchResponseUrl = searchUrl;
try { try {
const response = await fetchHtml(searchUrl, DELAY_MS, { const response = await fetchHtml(searchUrl, DELAY_MS, {
maxRetries: 3, maxRetries: 3,
includeResponseUrl: true,
headers: createFacebookHeaders(cookiesHeader),
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
logger.log( logger.log(
@@ -1164,22 +1119,29 @@ export default async function fetchFacebookItems(
); );
} }
}, },
cookies: cookiesHeader,
}); });
searchHtml = response.html; searchHtml = response.html;
searchResponseUrl = response.responseUrl; searchResponseUrl = response.responseUrl;
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
logger.warn( logger.warn(
`\nFacebook marketplace access failed (${err.status}): ${err.message}`, `\nFacebook marketplace access failed (${err.statusCode}): ${err.message}`,
); );
if (err.status === 400 || err.status === 401 || err.status === 403) { if (
err.statusCode === 400 ||
err.statusCode === 401 ||
err.statusCode === 403
) {
logger.warn( logger.warn(
"This might indicate invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.", "This might indicate invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
); );
} }
return finalizeResults([]); return finalizeResults([]);
} }
if (err instanceof RateLimitError) {
logger.warn(`\nFacebook marketplace access rate limited: ${err.message}`);
return finalizeResults([]);
}
throw err; throw err;
} }
@@ -1194,6 +1156,13 @@ export default async function fetchFacebookItems(
return finalizeResults([]); return finalizeResults([]);
} }
if (classification.kind === "checkpoint") {
logger.warn(
"Facebook marketplace returned a checkpoint challenge. This may require manual verification.",
);
return finalizeResults([]);
}
if (classification.unavailable) { if (classification.unavailable) {
logger.warn("Facebook marketplace search returned an unavailable route."); logger.warn("Facebook marketplace search returned an unavailable route.");
return finalizeResults([]); return finalizeResults([]);
@@ -1243,15 +1212,8 @@ export default async function fetchFacebookItems(
export async function fetchFacebookItem( export async function fetchFacebookItem(
itemId: string, itemId: string,
): Promise<FacebookListingDetails | null> { ): Promise<FacebookListingDetails | null> {
const cookies = await ensureFacebookCookies(); const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG);
const cookiesHeader = formatCookiesForHeader(userCookies, "www.facebook.com");
// Format cookies for HTTP header
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
if (!cookiesHeader) {
throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
);
}
const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`; const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`;
@@ -1261,6 +1223,8 @@ export async function fetchFacebookItem(
let itemResponseUrl = itemUrl; let itemResponseUrl = itemUrl;
try { try {
const response = await fetchHtml(itemUrl, 1000, { const response = await fetchHtml(itemUrl, 1000, {
includeResponseUrl: true,
headers: createFacebookHeaders(cookiesHeader),
onRateInfo: (remaining, reset) => { onRateInfo: (remaining, reset) => {
if (remaining && reset) { if (remaining && reset) {
logger.log( logger.log(
@@ -1268,18 +1232,17 @@ export async function fetchFacebookItem(
); );
} }
}, },
cookies: cookiesHeader,
}); });
itemHtml = response.html; itemHtml = response.html;
itemResponseUrl = response.responseUrl; itemResponseUrl = response.responseUrl;
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
logger.warn( logger.warn(
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`, `\nFacebook marketplace item access failed (${err.statusCode}): ${err.message}`,
); );
// Enhanced error handling based on status codes // Enhanced error handling based on status codes
switch (err.status) { switch (err.statusCode) {
case 400: case 400:
case 401: case 401:
case 403: case 403:
@@ -1305,15 +1268,32 @@ export async function fetchFacebookItem(
); );
break; break;
default: default:
logger.warn(`Unexpected error status: ${err.status}`); logger.warn(`Unexpected error status: ${err.statusCode}`);
} }
return null; return null;
} }
if (err instanceof RateLimitError) {
logger.warn(
`\nFacebook marketplace item rate limited for item ${itemId}: ${err.message}`,
);
logger.warn(
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
);
return null;
}
throw err; throw err;
} }
const classification = classifyFacebookResponse(itemHtml, itemResponseUrl); const classification = classifyFacebookResponse(itemHtml, itemResponseUrl);
if (classification.kind === "checkpoint") {
logExtractionMetrics(false, itemId);
logger.warn(
`Checkpoint challenge detected for item ${itemId}. Facebook may be limiting access.`,
);
return null;
}
if (classification.authGated) { if (classification.authGated) {
logExtractionMetrics(false, itemId); logExtractionMetrics(false, itemId);
logger.warn( logger.warn(

View File

@@ -11,6 +11,7 @@ import {
formatCookiesForHeader, formatCookiesForHeader,
loadCookiesOptional, loadCookiesOptional,
} from "../utils/cookies"; } from "../utils/cookies";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format"; import { formatCentsToCurrency } from "../utils/format";
import { import {
fetchHtml, fetchHtml,
@@ -568,78 +569,6 @@ export function parseSearch(
return results; return results;
} }
/**
Parse a listing page into a typed object (backward compatible).
*/
function _parseListing(
htmlString: HTMLString,
BASE_URL: string,
): KijijiListingDetails | null {
const apolloState = extractApolloState(htmlString);
if (!apolloState) return null;
const listingKey = findApolloListingKey(
apolloState,
(value) => typeof value.url === "string" && typeof value.title === "string",
);
if (!listingKey) return null;
const root = apolloState[listingKey];
if (!isRecord(root)) return null;
const {
url,
title,
description,
price,
type,
status,
activationDate,
endDate,
metrics,
location,
} = root as ApolloListingRoot;
const cents = price?.amount != null ? Number(price.amount) : undefined;
const amountFormatted =
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
const numberOfViews =
metrics?.views != null ? Number(metrics.views) : undefined;
const listingUrl =
typeof url === "string"
? url.startsWith("http")
? url
: `${BASE_URL}${url}`
: "";
if (!listingUrl || !title) return null;
return {
url: listingUrl,
title,
description,
listingPrice: amountFormatted
? {
amountFormatted,
cents:
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
currency: price?.currency,
}
: undefined,
listingType: type,
listingStatus: status,
creationDate: activationDate,
endDate,
numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null,
};
}
/** /**
* Parse a listing page into a detailed object with all available fields * Parse a listing page into a detailed object with all available fields
*/ */
@@ -893,7 +822,17 @@ export default async function fetchKijijiItems(
const searchResults = parseSearch(searchHtml, BASE_URL); const searchResults = parseSearch(searchHtml, BASE_URL);
if (searchResults.length === 0) { if (searchResults.length === 0) {
logger.log(`No more results found on page ${page}. Stopping pagination.`); if (page === 1) {
logger.log(
`No results found on page 1. The search URL was: ${searchUrl}\n` +
`Tip: Kijiji matches ALL words in the query against listing titles. ` +
`Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
);
} else {
logger.log(
`No more results found on page ${page}. Stopping pagination.`,
);
}
break; break;
} }
@@ -928,9 +867,7 @@ export default async function fetchKijijiItems(
const batchPromises = batch.map(async (link, batchIndex) => { const batchPromises = batch.map(async (link, batchIndex) => {
try { try {
if (batchIndex > 0) { if (batchIndex > 0) {
await new Promise((resolve) => await delay(DELAY_MS * batchIndex);
setTimeout(resolve, DELAY_MS * batchIndex),
);
} }
const html = await fetchHtml(link, 0, { const html = await fetchHtml(link, 0, {
@@ -952,11 +889,11 @@ export default async function fetchKijijiItems(
return parsed; return parsed;
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
console.error( logger.warn(
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`, `\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
); );
} else { } else {
console.error( logger.warn(
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`, `\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
); );
} }
@@ -974,7 +911,7 @@ export default async function fetchKijijiItems(
results.push(...batchResults); results.push(...batchResults);
if (i + CONCURRENT_REQUESTS < newListingLinks.length) { if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
await new Promise((resolve) => setTimeout(resolve, DELAY_MS)); await delay(DELAY_MS);
} }
} }

View File

@@ -7,6 +7,7 @@ import { logger } from "./logger";
export interface Cookie { export interface Cookie {
name: string; name: string;
value: string; value: string;
rawValue?: string;
domain: string; domain: string;
path: string; path: string;
secure?: boolean; secure?: boolean;
@@ -55,6 +56,7 @@ export function parseCookieString(
return { return {
name: trimmedName, name: trimmedName,
value: decodeURIComponent(trimmedValue), value: decodeURIComponent(trimmedValue),
rawValue: trimmedValue,
domain, domain,
path: "/", path: "/",
secure: true, secure: true,
@@ -95,7 +97,7 @@ export function formatCookiesForHeader(
}); });
return validCookies return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`) .map((cookie) => `${cookie.name}=${cookie.rawValue ?? cookie.value}`)
.join("; "); .join("; ");
} }

View File

@@ -0,0 +1,239 @@
import argon2 from "argon2-wasm-pro";
// ------------------ Types ------------------
interface ChallengeDetails {
p2: number;
p6: number;
p7: number;
p9: string;
p11: string;
p12: number;
p13: number;
p15: number;
}
interface ChallengeParams {
crefId: string;
cdetail: ChallengeDetails;
iid: string;
chlghost: string;
appName: string;
p: string;
destUrl: string;
}
interface ChallengeResult {
cookies: string;
}
// ------------------ Helpers ------------------
function memcmp(a: Uint8Array, b: number[], len: number): number {
for (let i = 0; i < len; i++) {
const va = a[i] ?? 0;
const vb = b[i] ?? 0;
if (va !== vb) return (va & 0xff) - (vb & 0xff);
}
return 0;
}
function intToBytes(val: number, arr: Uint8Array, offset: number) {
arr[offset] = val >>> 24;
arr[offset + 1] = val >>> 16;
arr[offset + 2] = val >>> 8;
arr[offset + 3] = val;
}
function string2Bin(str: string): number[] {
const result: number[] = [];
for (let i = 0; i < str.length; i++) {
result.push(str.charCodeAt(i));
}
return result;
}
function bufferToBase64(buf: Uint8Array): string {
return btoa(String.fromCharCode(...buf));
}
function parseCookiesFromSetCookie(cookies: string[]): Record<string, string> {
const result: Record<string, string> = {};
for (const header of cookies) {
const match = header.match(/^([^=]+)=([^;]+)/);
if (match?.[1] && match[2]) {
result[match[1]] = match[2];
}
}
return result;
}
// ------------------ Default headers ------------------
const BROWSER_UA =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
const _EBAY_HEADERS: Record<string, string> = {
"User-Agent": BROWSER_UA,
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
};
// ------------------ Parser ------------------
export function parseChallengePage(html: string): ChallengeParams | null {
const getHidden = (id: string): string => {
const re = new RegExp(
`id=${id}\\s+value='([^']*)'` +
`|id=${id}\\s+value="([^"]*)"` +
`|id=${id}\\s+value=([^\\s>]+)`,
"i",
);
const m = html.match(re);
if (!m) return "";
return m[1] ?? m[2] ?? m[3] ?? "";
};
const crefId = getHidden("_crefId");
const cdetailRaw = getHidden("_cdetail");
const iid = getHidden("_iid");
const chlghost = getHidden("_chlghost");
const appName = getHidden("_appName");
const p = getHidden("_p");
const formActionMatch = html.match(
/<form\s+id=destForm\s+[^>]*action=([^\s>]+)/i,
);
const destUrl = formActionMatch?.[1]?.trim() ?? "";
if (!crefId || !cdetailRaw) return null;
let cdetail: ChallengeDetails;
try {
const parsed = JSON.parse(cdetailRaw);
const d = parsed.details;
cdetail = {
p2: Number(d.p2),
p6: Number(d.p6),
p7: Number(d.p7),
p9: d.p9,
p11: d.p11,
p12: Number(d.p12),
p13: Number(d.p13),
p15: Number(d.p15),
};
} catch {
return null;
}
return {
crefId,
cdetail,
iid,
chlghost: chlghost || "https://www.ebay.ca",
appName: appName || "orch",
p,
destUrl,
};
}
// ------------------ Solver ------------------
async function solveArgon2Challenge(
cdetail: ChallengeDetails,
): Promise<string[]> {
const targetBytes = string2Bin(atob(cdetail.p11));
const targetLen = targetBytes.length;
const nonceLen = cdetail.p6;
const answerCount = cdetail.p15;
const salt = new Uint8Array(
Uint8Array.from(atob(cdetail.p9), (c) => c.charCodeAt(0)),
);
const answers: string[] = [];
let nonce = new Uint8Array(nonceLen);
crypto.getRandomValues(nonce);
intToBytes(0, nonce, nonce.length - 4);
let counter = 0;
while (answers.length < answerCount) {
const result = await argon2.hash({
pass: nonce,
salt,
time: cdetail.p2,
mem: cdetail.p13,
hashLen: cdetail.p7,
parallelism: cdetail.p12,
type: 2,
});
const hashBytes = result.hash as Uint8Array;
if (memcmp(hashBytes, targetBytes, targetLen) <= 0) {
answers.push(bufferToBase64(nonce));
nonce = new Uint8Array(nonceLen);
crypto.getRandomValues(nonce);
intToBytes(0, nonce, nonce.length - 4);
counter = 0;
} else {
counter++;
intToBytes(counter, nonce, nonce.length - 4);
}
}
return answers;
}
// ------------------ Public API ------------------
export async function solveEbayChallenge(
html: string,
cookieHeader?: string,
): Promise<ChallengeResult | null> {
const params = parseChallengePage(html);
if (!params) return null;
const answers = await solveArgon2Challenge(params.cdetail);
const encodedAnswers = encodeURIComponent(answers.join(","));
const body = JSON.stringify({
iid: params.iid,
appName: params.appName,
referenceId: params.crefId,
pvt: Date.now().toString(),
crt: Date.now().toString(),
encodedAnswers,
p: params.p,
ru: params.destUrl,
});
const headers: Record<string, string> = {
"content-type": "application/json",
accept: "application/json, text/plain, */*",
"user-agent": BROWSER_UA,
};
if (cookieHeader) {
headers.cookie = cookieHeader;
}
const res = await fetch(`${params.chlghost}/splashui/challengesvc/answer`, {
method: "POST",
headers,
body,
});
if (!res.ok) return null;
// Collect cookies from answer response
const setCookies = res.headers.getSetCookie?.() ?? [];
const answerCookies = parseCookiesFromSetCookie(setCookies);
const cookieEntries = Object.entries(answerCookies);
if (cookieEntries.length === 0) return null;
const cookies = cookieEntries.map(([k, v]) => `${k}=${v}`).join("; ");
return { cookies };
}

View File

@@ -0,0 +1,128 @@
// Facebook Marketplace session & challenge utilities
// ------------------ Types ------------------
export type ChallengeType =
| "login_wall"
| "checkpoint"
| "bad_headers"
| "rate_limited"
| "none";
// ------------------ Constants ------------------
const FACEBOOK_BROWSER_HEADERS: Record<string, string> = {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"sec-ch-ua":
'"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Linux"',
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
};
// ------------------ Cookie Management ------------------
function parseSetCookies(setCookieHeaders: string[]): Record<string, string> {
const cookies: Record<string, string> = {};
for (const header of setCookieHeaders) {
const parts = header.split(";");
const firstPart = parts[0]?.trim();
if (!firstPart) continue;
const eqIdx = firstPart.indexOf("=");
if (eqIdx === -1) continue;
const name = firstPart.slice(0, eqIdx).trim();
const value = firstPart.slice(eqIdx + 1).trim();
if (name && value) {
cookies[name] = value;
}
}
return cookies;
}
function cookiesToHeader(cookies: Record<string, string>): string {
return Object.entries(cookies)
.map(([name, value]) => `${name}=${value}`)
.join("; ");
}
// ------------------ Session Warmup ------------------
export async function warmFacebookSession(): Promise<Record<string, string>> {
try {
const res = await fetch("https://www.facebook.com/", {
method: "GET",
headers: FACEBOOK_BROWSER_HEADERS,
redirect: "manual",
signal: AbortSignal.timeout(10000),
});
const setCookies = res.headers.getSetCookie?.() ?? [];
return parseSetCookies(setCookies);
} catch {
return {};
}
}
// ------------------ Challenge Detection ------------------
export function detectFacebookChallenge(
status: number,
html: string,
responseUrl: string,
): ChallengeType {
if (status === 400) {
return "bad_headers";
}
if (status === 429) {
return "rate_limited";
}
if (responseUrl.includes("/login/")) {
return "login_wall";
}
if (html.includes("You must log in") || html.includes("log in to continue")) {
return "login_wall";
}
if (
responseUrl.includes("/checkpoint/") ||
(html.includes("checkpoint") && html.includes("challenge"))
) {
return "checkpoint";
}
return "none";
}
// ------------------ Header Construction ------------------
export function buildFacebookHeaders(
cookieJar: Record<string, string>,
extraHeaders?: Record<string, string>,
): Record<string, string> {
const headers: Record<string, string> = {
...FACEBOOK_BROWSER_HEADERS,
};
const cookieString = cookiesToHeader(cookieJar);
if (cookieString) {
headers.cookie = cookieString;
}
if (extraHeaders) {
Object.assign(headers, extraHeaders);
}
return headers;
}

View File

@@ -1,3 +1,4 @@
import type { HTMLString } from "../types/common";
import { delay } from "./delay"; import { delay } from "./delay";
/** Custom error class for HTTP-related failures */ /** Custom error class for HTTP-related failures */
@@ -60,10 +61,57 @@ export function isRecord(value: unknown): value is Record<string, unknown> {
/** /**
* Calculate exponential backoff delay with jitter * Calculate exponential backoff delay with jitter
*/ */
function calculateBackoffDelay(attempt: number, baseMs: number): number { function calculateBackoffDelay(
attempt: number,
baseMs: number,
jitter: () => number = Math.random,
): number {
const exponentialDelay = baseMs * 2 ** attempt; const exponentialDelay = baseMs * 2 ** attempt;
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter const jitterDelay = jitter() * 0.1 * exponentialDelay; // 10% jitter
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds return Math.min(exponentialDelay + jitterDelay, 30000); // Cap at 30 seconds
}
const MAX_RATE_LIMIT_WAIT_MS = 30_000;
const MAX_DELTA_RESET_SECONDS = 86_400;
function mergeHeaders(
defaultHeaders: Record<string, string>,
customHeaders?: Record<string, string>,
): Record<string, string> {
const merged: Record<string, string> = {};
for (const [key, value] of Object.entries(defaultHeaders)) {
merged[key.toLowerCase()] = value;
}
for (const [key, value] of Object.entries(customHeaders ?? {})) {
merged[key.toLowerCase()] = value;
}
return merged;
}
function calculateRateLimitWaitMs(
resetHeader: string | null,
fallbackWaitMs: number,
): number {
if (!resetHeader) return fallbackWaitMs;
const resetValue = Number(resetHeader);
if (!Number.isFinite(resetValue)) return fallbackWaitMs;
const waitMs =
resetValue <= MAX_DELTA_RESET_SECONDS
? resetValue * 1000
: resetValue * 1000 - Date.now();
return Math.min(Math.max(0, waitMs), MAX_RATE_LIMIT_WAIT_MS);
}
/** Result type when includeResponseUrl is true */
export interface FetchHtmlResult {
html: HTMLString;
responseUrl: string;
} }
/** Options for fetchHtml */ /** Options for fetchHtml */
@@ -73,6 +121,8 @@ export interface FetchHtmlOptions {
timeoutMs?: number; timeoutMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void; onRateInfo?: (remaining: string | null, reset: string | null) => void;
headers?: Record<string, string>; headers?: Record<string, string>;
includeResponseUrl?: boolean;
jitter?: () => number;
} }
/** /**
@@ -80,14 +130,24 @@ export interface FetchHtmlOptions {
* @param url - The URL to fetch * @param url - The URL to fetch
* @param delayMs - Delay in milliseconds between requests (rate limiting) * @param delayMs - Delay in milliseconds between requests (rate limiting)
* @param opts - Optional fetch options * @param opts - Optional fetch options
* @returns The HTML content as a string * @returns The HTML content as a string, or an object with html and responseUrl
* @throws HttpError, NetworkError, or RateLimitError on failure * @throws HttpError, NetworkError, or RateLimitError on failure
*/ */
export async function fetchHtml(
url: string,
delayMs: number,
opts: FetchHtmlOptions & { includeResponseUrl: true },
): Promise<FetchHtmlResult>;
export async function fetchHtml( export async function fetchHtml(
url: string, url: string,
delayMs: number, delayMs: number,
opts?: FetchHtmlOptions, opts?: FetchHtmlOptions,
): Promise<string> { ): Promise<HTMLString>;
export async function fetchHtml(
url: string,
delayMs: number,
opts?: FetchHtmlOptions,
): Promise<HTMLString | FetchHtmlResult> {
const maxRetries = opts?.maxRetries ?? 3; const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 1000; const retryBaseMs = opts?.retryBaseMs ?? 1000;
const timeoutMs = opts?.timeoutMs ?? 30000; const timeoutMs = opts?.timeoutMs ?? 30000;
@@ -118,13 +178,17 @@ export async function fetchHtml(
const controller = new AbortController(); const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeoutMs); const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
const res = await fetch(url, { const res = await (async () => {
method: "GET", try {
headers: { ...defaultHeaders, ...opts?.headers }, return await fetch(url, {
signal: controller.signal, method: "GET",
}); headers: mergeHeaders(defaultHeaders, opts?.headers),
signal: controller.signal,
clearTimeout(timeoutId); });
} finally {
clearTimeout(timeoutId);
}
})();
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset"); const rateLimitReset = res.headers.get("X-RateLimit-Reset");
@@ -136,12 +200,17 @@ export async function fetchHtml(
const resetSeconds = rateLimitReset const resetSeconds = rateLimitReset
? Number(rateLimitReset) ? Number(rateLimitReset)
: Number.NaN; : Number.NaN;
const waitMs = Number.isFinite(resetSeconds) const waitMs = calculateRateLimitWaitMs(
? Math.max(0, resetSeconds * 1000) rateLimitReset,
: calculateBackoffDelay(attempt, retryBaseMs); calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
);
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => setTimeout(resolve, waitMs)); await delay(waitMs);
continue; continue;
} }
throw new RateLimitError( throw new RateLimitError(
@@ -153,8 +222,12 @@ export async function fetchHtml(
// Retry on server errors // Retry on server errors
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await new Promise((resolve) => await delay(
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)), calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
); );
continue; continue;
} }
@@ -170,7 +243,9 @@ export async function fetchHtml(
// Respect per-request delay to maintain rate limiting // Respect per-request delay to maintain rate limiting
await delay(delayMs); await delay(delayMs);
return html; return opts?.includeResponseUrl
? { html, responseUrl: res.url || url }
: html;
} catch (err) { } catch (err) {
// Re-throw known errors // Re-throw known errors
if ( if (
@@ -183,8 +258,12 @@ export async function fetchHtml(
if (err instanceof Error && err.name === "AbortError") { if (err instanceof Error && err.name === "AbortError") {
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => await delay(
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)), calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
); );
continue; continue;
} }
@@ -193,8 +272,12 @@ export async function fetchHtml(
// Network or other errors // Network or other errors
if (attempt < maxRetries) { if (attempt < maxRetries) {
await new Promise((resolve) => await delay(
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)), calculateBackoffDelay(
attempt,
retryBaseMs,
opts?.jitter ?? Math.random,
),
); );
continue; continue;
} }

View File

@@ -29,9 +29,11 @@ const originalWarn = console.warn;
describe("eBay Scraper Cookie Handling", () => { describe("eBay Scraper Cookie Handling", () => {
beforeEach(() => { beforeEach(() => {
delete process.env.EBAY_COOKIE;
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => Promise.resolve("<html><body></body></html>"), text: () => Promise.resolve("<html><body></body></html>"),
}), }),
) as unknown as typeof fetch; ) as unknown as typeof fetch;
@@ -46,17 +48,22 @@ describe("eBay Scraper Cookie Handling", () => {
test("should ignore request cookie overrides and rely on EBAY_COOKIE", async () => { test("should ignore request cookie overrides and rely on EBAY_COOKIE", async () => {
await fetchEbayItems("laptop", 1000); await fetchEbayItems("laptop", 1000);
expect(global.fetch).toHaveBeenCalledTimes(1); // First call is homepage warm-up, second is search
expect(global.fetch).toHaveBeenCalledTimes(2);
const firstFetchCall = (global.fetch as unknown as ReturnType<typeof mock>) // The search request is the second call
.mock.calls[0]; const secondFetchCall = (global.fetch as unknown as ReturnType<typeof mock>)
if (!firstFetchCall) { .mock.calls[1];
throw new Error("Expected fetch to be called"); if (!secondFetchCall) {
throw new Error("Expected search fetch to be called");
} }
const [, init] = firstFetchCall; const [searchUrl, init] = secondFetchCall;
const headers = (init as RequestInit).headers as Record<string, string>; const headers = (init as RequestInit).headers as Record<string, string>;
expect(searchUrl).toBe(
"https://www.ebay.ca/sch/i.html?_nkw=laptop&_sacat=0&_from=R40&LH_BIN=1&LH_PrefLoc=1",
);
expect(headers.Cookie).toBeUndefined(); expect(headers.Cookie).toBeUndefined();
}); });
@@ -64,6 +71,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -84,10 +92,26 @@ describe("eBay Scraper Cookie Handling", () => {
]); ]);
}); });
test("returns empty results when eBay rate-limits the request", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: false,
status: 429,
headers: { get: () => "0" },
text: () => Promise.resolve(""),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([]);
});
test("deduplicates repeated item links from the same card", async () => { test("deduplicates repeated item links from the same card", async () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -114,6 +138,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -146,6 +171,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -188,6 +214,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -210,10 +237,86 @@ describe("eBay Scraper Cookie Handling", () => {
]); ]);
}); });
test("parses current eBay s-card markup with unquoted item links", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<div class="s-card s-card--horizontal">
<div class=su-card-container__header>
<a class=s-card__link href=https://ebay.com/itm/1234567890?itmmeta=abc>
<div role=heading aria-level=3 class=s-card__title>
<span class="su-styled-text primary default">Apple MacBook Air M1 2020 8GB 256GB</span>
</div>
</a>
</div>
<div class=su-card-container__attributes>
<span class="su-styled-text primary bold large-1 s-card__price">CA $599.00</span>
</div>
</div>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("macbook", 1000);
expect(results).toEqual([
expect.objectContaining({
title: "Apple MacBook Air M1 2020 8GB 256GB",
url: "https://ebay.com/itm/1234567890?itmmeta=abc",
listingPrice: expect.objectContaining({ cents: 59_900 }),
}),
]);
});
test("parses embedded eBay payload listings before HTML fallback", async () => {
const payload = encodeURIComponent(
JSON.stringify({
searchResults: [
{
title: "Apple MacBook Air M1 API Result",
itemWebUrl: "https://www.ebay.ca/itm/9876543210?hash=item987",
price: { value: "550.00", currency: "CAD" },
},
],
}),
);
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<script data-inlinepayload="${payload}"></script>
</body></html>
`),
}),
) as unknown as typeof fetch;
const results = await fetchEbayItems("macbook", 1000);
expect(results).toEqual([
expect.objectContaining({
title: "Apple MacBook Air M1 API Result",
url: "https://www.ebay.ca/itm/9876543210?hash=item987",
listingPrice: expect.objectContaining({
amountFormatted: "CAD 550.00",
cents: 55_000,
currency: "CAD",
}),
}),
]);
});
test("treats US dollar prices as USD", async () => { test("treats US dollar prices as USD", async () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -243,6 +346,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -272,6 +376,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -301,6 +406,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -343,6 +449,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -375,6 +482,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -407,6 +515,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -440,6 +549,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -467,6 +577,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -499,6 +610,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -529,6 +641,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -574,6 +687,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>
@@ -612,6 +726,7 @@ describe("eBay Scraper Cookie Handling", () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({
ok: true, ok: true,
headers: { get: () => null },
text: () => text: () =>
Promise.resolve(` Promise.resolve(`
<html><body> <html><body>

View File

@@ -70,6 +70,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
expect(result[0]).toEqual({ expect(result[0]).toEqual({
name: "c_user", name: "c_user",
value: "123456789", value: "123456789",
rawValue: "123456789",
domain: ".facebook.com", domain: ".facebook.com",
path: "/", path: "/",
secure: true, secure: true,
@@ -80,6 +81,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
expect(result[1]).toEqual({ expect(result[1]).toEqual({
name: "xs", name: "xs",
value: "abcdef123456", value: "abcdef123456",
rawValue: "abcdef123456",
domain: ".facebook.com", domain: ".facebook.com",
path: "/", path: "/",
secure: true, secure: true,
@@ -97,6 +99,16 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
expect(result[1]?.value).toBe("abc=def"); expect(result[1]?.value).toBe("abc=def");
}); });
test("should preserve raw encoded values when formatting cookie headers", () => {
const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
const result = formatCookiesForHeader(
parseFacebookCookieString(cookieString),
"www.facebook.com",
);
expect(result).toBe(cookieString);
});
test("should filter out malformed cookies", () => { test("should filter out malformed cookies", () => {
const cookieString = "c_user=123; invalid; xs=abc; =empty"; const cookieString = "c_user=123; invalid; xs=abc; =empty";
const result = parseFacebookCookieString(cookieString); const result = parseFacebookCookieString(cookieString);

View File

@@ -38,4 +38,87 @@ describe("fetchHtml", () => {
expect(scheduledDelays).not.toContain(1000); expect(scheduledDelays).not.toContain(1000);
}); });
test("fetchHtml returns responseUrl when includeResponseUrl is true", async () => {
process.env.NODE_ENV = "test";
global.fetch = mock(() =>
Promise.resolve({
ok: true,
status: 200,
url: "https://example.test/final",
headers: { get: () => null },
text: () => Promise.resolve("<html></html>"),
}),
) as unknown as typeof fetch;
const result = await fetchHtml("https://example.test", 0, {
includeResponseUrl: true,
});
expect(result.html).toBe("<html></html>");
expect(result.responseUrl).toBe("https://example.test/final");
});
test("rate limit epoch reset uses bounded wait", async () => {
process.env.NODE_ENV = "production";
const scheduledDelays: number[] = [];
const farFutureEpochSeconds = Math.floor(Date.now() / 1000) + 315_360_000;
let calls = 0;
global.fetch = mock(() => {
calls += 1;
return Promise.resolve({
ok: calls > 1,
status: calls > 1 ? 200 : 429,
url: "https://example.test",
headers: {
get: (name: string) =>
name === "X-RateLimit-Reset" ? String(farFutureEpochSeconds) : null,
},
text: () => Promise.resolve("<html></html>"),
});
}) as unknown as typeof fetch;
globalThis.setTimeout = mock((handler: TimerHandler, timeout?: number) => {
scheduledDelays.push(Number(timeout));
if (timeout !== 1_234_567 && typeof handler === "function") {
handler();
}
return 0 as unknown as ReturnType<typeof setTimeout>;
}) as unknown as typeof setTimeout;
globalThis.clearTimeout = mock(() => {}) as unknown as typeof clearTimeout;
await fetchHtml("https://example.test", 0, {
maxRetries: 1,
timeoutMs: 1_234_567,
});
expect(scheduledDelays).toContain(30_000);
expect(scheduledDelays).not.toContain(farFutureEpochSeconds * 1000);
});
test("custom Accept header overrides default accept without duplicate casing", async () => {
process.env.NODE_ENV = "test";
const customAccept = "text/plain";
let requestHeaders: HeadersInit | undefined;
global.fetch = mock((_url: string | URL | Request, init?: RequestInit) => {
requestHeaders = init?.headers;
return Promise.resolve({
ok: true,
status: 200,
url: "https://example.test",
headers: { get: () => null },
text: () => Promise.resolve("<html></html>"),
});
}) as unknown as typeof fetch;
await fetchHtml("https://example.test", 0, {
headers: { Accept: customAccept },
});
expect(requestHeaders).toBeDefined();
expect((requestHeaders as Record<string, string>).accept).toBe(
customAccept,
);
expect((requestHeaders as Record<string, string>).Accept).toBeUndefined();
});
}); });

View File

@@ -0,0 +1,35 @@
import { describe, expect, test } from "bun:test";
import fetchEbayItems from "../../src/scrapers/ebay";
const LIVE_RESULT_LIMIT = 3;
const LIVE_TEST_TIMEOUT_MS = 30_000;
describe("eBay live parser", () => {
test(
"scrapes live search results into listing details",
async () => {
const results = await fetchEbayItems("iphone", 1, {
maxItems: LIVE_RESULT_LIMIT,
});
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
if (!listing.listingPrice) {
throw new Error(`Expected listing price for ${listing.url}`);
}
if (typeof listing.listingPrice.cents !== "number") {
throw new Error(`Expected listing cents for ${listing.url}`);
}
if (!listing.listingPrice.currency) {
throw new Error(`Expected listing currency for ${listing.url}`);
}
expect(listing.url).toStartWith("https://");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
},
LIVE_TEST_TIMEOUT_MS,
);
});

View File

@@ -0,0 +1,44 @@
import { describe, expect, test } from "bun:test";
import fetchFacebookItems from "../../src/scrapers/facebook";
const LIVE_RESULT_LIMIT = 3;
const LIVE_TEST_TIMEOUT_MS = 30_000;
describe("Facebook live parser", () => {
test(
"scrapes live marketplace search results into listing details",
async () => {
if (!process.env.FACEBOOK_COOKIE?.trim()) {
throw new Error("FACEBOOK_COOKIE is required for Facebook live tests");
}
const results = await fetchFacebookItems(
"iphone",
1,
"toronto",
LIVE_RESULT_LIMIT,
);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
if (!listing.listingPrice) {
throw new Error(`Expected listing price for ${listing.url}`);
}
if (typeof listing.listingPrice.cents !== "number") {
throw new Error(`Expected listing cents for ${listing.url}`);
}
if (!listing.listingPrice.currency) {
throw new Error(`Expected listing currency for ${listing.url}`);
}
expect(listing.url).toStartWith(
"https://www.facebook.com/marketplace/item/",
);
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
},
LIVE_TEST_TIMEOUT_MS,
);
});

View File

@@ -0,0 +1,38 @@
import { describe, expect, test } from "bun:test";
import fetchKijijiItems from "../../src/scrapers/kijiji";
const LIVE_TEST_TIMEOUT_MS = 30_000;
describe("Kijiji live parser", () => {
test(
"scrapes live search results into detailed listings",
async () => {
const results = await fetchKijijiItems(
"iphone",
1,
"https://www.kijiji.ca",
{ maxPages: 1 },
{ includeImages: false, sellerDataDepth: "basic" },
);
expect(results.length).toBeGreaterThan(0);
for (const listing of results) {
if (!listing.listingPrice) {
throw new Error(`Expected listing price for ${listing.url}`);
}
if (typeof listing.listingPrice.cents !== "number") {
throw new Error(`Expected listing cents for ${listing.url}`);
}
if (!listing.listingPrice.currency) {
throw new Error(`Expected listing currency for ${listing.url}`);
}
expect(listing.url).toStartWith("https://www.kijiji.ca/");
expect(listing.title.length).toBeGreaterThan(0);
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
}
},
LIVE_TEST_TIMEOUT_MS,
);
});

View File

@@ -1,11 +1,6 @@
// Test setup for Bun test runner global.fetch = Object.assign(
// This file is loaded before any tests run due to bunfig.toml preload () => {
throw new Error("Tests must mock fetch explicitly");
// Mock fetch globally for tests },
global.fetch = { preconnect: fetch.preconnect },
global.fetch || ) as typeof fetch;
(() => {
throw new Error("fetch is not available in test environment");
});
// Add any global test utilities here

View File

@@ -5,5 +5,5 @@
"@/*": ["./src/*"] "@/*": ["./src/*"]
} }
}, },
"include": ["./src", "./test"] "include": ["./src", "./test", "../../types/**/*.d.ts"]
} }

View File

@@ -21,5 +21,6 @@
## Verify ## Verify
- `bun test packages/mcp-server/test`
- `bun run --cwd packages/mcp-server build` - `bun run --cwd packages/mcp-server build`
- `bun run ci` - `bun run ci`

View File

@@ -2,7 +2,32 @@ import { logger } from "../logger";
import { tools } from "./tools"; import { tools } from "./tools";
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api"; const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000;
async function callMarketplaceApi(
marketplace: string,
params: URLSearchParams,
): Promise<unknown> {
const url = `${API_BASE_URL}/${marketplace}?${params.toString()}`;
logger.log(`[MCP] Calling ${marketplace} API`);
const response = await Promise.race([
fetch(url),
new Promise<Response>((_, reject) =>
setTimeout(
() => reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
logger.error(
`[MCP] ${marketplace} API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
return response.json();
}
/** /**
* Handle MCP JSON-RPC 2.0 protocol requests * Handle MCP JSON-RPC 2.0 protocol requests
@@ -116,7 +141,6 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
params.append("priceMin", args.priceMin.toString()); params.append("priceMin", args.priceMin.toString());
if (args.priceMax) if (args.priceMax)
params.append("priceMax", args.priceMax.toString()); params.append("priceMax", args.priceMax.toString());
if (args.cookies) params.append("cookies", args.cookies);
if (args.unstableFilter !== undefined) if (args.unstableFilter !== undefined)
params.append("unstableFilter", args.unstableFilter.toString()); params.append("unstableFilter", args.unstableFilter.toString());
@@ -139,7 +163,14 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
logger.error( logger.error(
`[MCP] Kijiji API error ${response.status}: ${errorText}`, `[MCP] Kijiji API error ${response.status}: ${errorText}`,
); );
throw new Error(`API returned ${response.status}: ${errorText}`); let errorMessage = `API returned ${response.status}: ${errorText}`;
try {
const errorJson = JSON.parse(errorText) as { message?: string };
if (errorJson.message) errorMessage = errorJson.message;
} catch {
// not JSON — use raw text
}
throw new Error(errorMessage);
} }
result = await response.json(); result = await response.json();
logger.log( logger.log(
@@ -161,31 +192,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
if (args.unstableFilter !== undefined) if (args.unstableFilter !== undefined)
params.append("unstableFilter", args.unstableFilter.toString()); params.append("unstableFilter", args.unstableFilter.toString());
logger.log( result = await callMarketplaceApi("facebook", params);
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
logger.error(
`[MCP] Facebook API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
logger.log(
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
);
} else if (name === "search_ebay") { } else if (name === "search_ebay") {
const query = args.query; const query = args.query;
if (!query) { if (!query) {
@@ -215,31 +222,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
if (args.unstableFilter !== undefined) if (args.unstableFilter !== undefined)
params.append("unstableFilter", args.unstableFilter.toString()); params.append("unstableFilter", args.unstableFilter.toString());
logger.log( result = await callMarketplaceApi("ebay", params);
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
);
const response = await Promise.race([
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
new Promise<Response>((_, reject) =>
setTimeout(
() =>
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
API_TIMEOUT,
),
),
]);
if (!response.ok) {
const errorText = await response.text();
logger.error(
`[MCP] eBay API error ${response.status}: ${errorText}`,
);
throw new Error(`API returned ${response.status}: ${errorText}`);
}
result = await response.json();
logger.log(
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
);
} else { } else {
return Response.json({ return Response.json({
jsonrpc: "2.0", jsonrpc: "2.0",

View File

@@ -11,7 +11,11 @@ export const tools = [
properties: { properties: {
query: { query: {
type: "string", type: "string",
description: "Search query for Kijiji listings", description:
"Search query for Kijiji listings. " +
"Kijiji requires ALL words to appear in the listing title — keep queries short and use terms sellers actually write. " +
"Avoid marketing/brand phrases sellers don't use (e.g. use 'macbook air m1' not 'macbook air m1 apple silicon'). " +
"If the search returns no results, try a shorter or more common query.",
}, },
location: { location: {
type: "string", type: "string",
@@ -46,16 +50,11 @@ export const tools = [
}, },
priceMin: { priceMin: {
type: "number", type: "number",
description: "Minimum price in cents", description: "Minimum price in dollars",
}, },
priceMax: { priceMax: {
type: "number", type: "number",
description: "Maximum price in cents", description: "Maximum price in dollars",
},
cookies: {
type: "string",
description:
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
}, },
unstableFilter: { unstableFilter: {
type: "boolean", type: "boolean",
@@ -108,11 +107,11 @@ export const tools = [
}, },
minPrice: { minPrice: {
type: "number", type: "number",
description: "Minimum price filter", description: "Minimum price in dollars",
}, },
maxPrice: { maxPrice: {
type: "number", type: "number",
description: "Maximum price filter", description: "Maximum price in dollars",
}, },
strictMode: { strictMode: {
type: "boolean", type: "boolean",

View File

@@ -15,18 +15,13 @@ describe("MCP protocol cookie inputs", () => {
global.fetch = originalFetch; global.fetch = originalFetch;
}); });
test("search tools should not expose Facebook or eBay cookie inputs", () => { test("search tools should not expose cookie inputs", () => {
const searchFacebookTool = tools.find( const toolNames = ["search_kijiji", "search_facebook", "search_ebay"];
(tool) => tool.name === "search_facebook", for (const toolName of toolNames) {
); const tool = tools.find((candidate) => candidate.name === toolName);
const searchEbayTool = tools.find((tool) => tool.name === "search_ebay"); expect(tool?.inputSchema.properties).not.toHaveProperty("cookies");
expect(tool?.inputSchema.properties).not.toHaveProperty("cookiesSource");
expect(searchFacebookTool?.inputSchema.properties).not.toHaveProperty( }
"cookiesSource",
);
expect(searchEbayTool?.inputSchema.properties).not.toHaveProperty(
"cookies",
);
}); });
test("search_facebook should not forward cookies query parameters", async () => { test("search_facebook should not forward cookies query parameters", async () => {
@@ -53,6 +48,31 @@ describe("MCP protocol cookie inputs", () => {
expect(String(calledUrl)).toContain("/facebook?q=laptop"); expect(String(calledUrl)).toContain("/facebook?q=laptop");
expect(String(calledUrl)).not.toContain("cookies="); expect(String(calledUrl)).not.toContain("cookies=");
}); });
test("search_kijiji should not forward cookies query parameters", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_kijiji",
arguments: {
query: "laptop",
cookies: "s=1",
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("/kijiji?q=laptop");
expect(String(calledUrl)).not.toContain("cookies=");
});
}); });
describe("MCP protocol unstableFilter", () => { describe("MCP protocol unstableFilter", () => {
@@ -108,6 +128,46 @@ describe("MCP protocol unstableFilter", () => {
expect(String(calledUrl)).toContain("unstableFilter=true"); expect(String(calledUrl)).toContain("unstableFilter=true");
}); });
test("search_kijiji should document price filters as dollars", () => {
const tool = tools.find((candidate) => candidate.name === "search_kijiji");
const priceMin = tool?.inputSchema.properties.priceMin as {
description: string;
};
const priceMax = tool?.inputSchema.properties.priceMax as {
description: string;
};
expect(priceMin.description).toContain("dollars");
expect(priceMax.description).toContain("dollars");
});
test("handler should forward Kijiji dollar price filters to API", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_kijiji",
arguments: {
query: "macbook",
priceMin: 999.99,
priceMax: 1000,
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("priceMin=999.99");
expect(String(calledUrl)).toContain("priceMax=1000");
});
test("handler should forward unstableFilter=true for search_facebook", async () => { test("handler should forward unstableFilter=true for search_facebook", async () => {
await handleMcpRequest( await handleMcpRequest(
new Request("http://localhost", { new Request("http://localhost", {
@@ -132,6 +192,35 @@ describe("MCP protocol unstableFilter", () => {
expect(String(calledUrl)).toContain("unstableFilter=true"); expect(String(calledUrl)).toContain("unstableFilter=true");
}); });
test("tools/call returns API JSON as text content", async () => {
global.fetch = mock(() =>
Promise.resolve(
new Response(JSON.stringify([{ title: "item" }]), { status: 200 }),
),
) as unknown as typeof fetch;
const response = await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_facebook",
arguments: { query: "laptop" },
},
}),
}),
);
const body = await response.json();
expect(body.result.content[0].type).toBe("text");
expect(JSON.parse(body.result.content[0].text)).toEqual([
{ title: "item" },
]);
});
test("handler should forward unstableFilter=true for search_ebay", async () => { test("handler should forward unstableFilter=true for search_ebay", async () => {
await handleMcpRequest( await handleMcpRequest(
new Request("http://localhost", { new Request("http://localhost", {
@@ -155,4 +244,44 @@ describe("MCP protocol unstableFilter", () => {
.calls[0]?.[0]; .calls[0]?.[0];
expect(String(calledUrl)).toContain("unstableFilter=true"); expect(String(calledUrl)).toContain("unstableFilter=true");
}); });
test("search_ebay should document price filters as dollars", () => {
const tool = tools.find((candidate) => candidate.name === "search_ebay");
const minPrice = tool?.inputSchema.properties.minPrice as {
description: string;
};
const maxPrice = tool?.inputSchema.properties.maxPrice as {
description: string;
};
expect(minPrice.description).toContain("dollars");
expect(maxPrice.description).toContain("dollars");
});
test("handler should forward eBay dollar price filters to API", async () => {
await handleMcpRequest(
new Request("http://localhost", {
method: "POST",
body: JSON.stringify({
jsonrpc: "2.0",
id: 1,
method: "tools/call",
params: {
name: "search_ebay",
arguments: {
query: "macbook",
minPrice: 999.99,
maxPrice: 1000,
},
},
}),
}),
);
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
.calls[0]?.[0];
expect(String(calledUrl)).toContain("minPrice=999.99");
expect(String(calledUrl)).toContain("maxPrice=1000");
});
}); });

View File

@@ -5,5 +5,5 @@
"@/*": ["./src/*"] "@/*": ["./src/*"]
} }
}, },
"include": ["./src", "./test"] "include": ["./src", "./test", "../../types/**/*.d.ts"]
} }

25
types/argon2-wasm-pro/index.d.ts vendored Normal file
View File

@@ -0,0 +1,25 @@
declare module "argon2-wasm-pro" {
interface Argon2Options {
pass: string | Uint8Array;
salt: Uint8Array;
time: number;
mem: number;
hashLen: number;
parallelism: number;
type: number;
}
interface Argon2Result {
hash: Uint8Array;
hashHex: string;
encoded: string;
}
function hash(options: Argon2Options): Promise<Argon2Result>;
const argon2: {
hash: typeof hash;
};
export default argon2;
}