Compare commits
91 Commits
9070f76412
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| ec545723bb | |||
| 0a246a29bf | |||
| 7ab33d0b02 | |||
| d2c3c07e7d | |||
| 0470a7bec7 | |||
| 89ad1c521f | |||
| 5c732287c5 | |||
| 20fb46190a | |||
| e791fc5478 | |||
| c1fa5168dc | |||
| ec2a26cedf | |||
| 5d99e984e0 | |||
| b657ea594a | |||
| 5651a194e9 | |||
| 31cc0660bc | |||
| fc7200777e | |||
| f68a5a8d9b | |||
| a6b24b318e | |||
| 0873df7e82 | |||
| 24e0a8266e | |||
| db173aef1b | |||
| d1cd028f34 | |||
| 28b3267b7d | |||
| c0dda57f64 | |||
| 31866de787 | |||
| 9c4c347933 | |||
| 53eafe6d4c | |||
| 84f17fbdfd | |||
| 3a722a2d11 | |||
| f95b974c7e | |||
| f5339cadf1 | |||
| 5d86a4e54d | |||
| 82e7abc057 | |||
| 6e50ebf901 | |||
| 5ecb645ee3 | |||
| 82e12283de | |||
| 22eb65d4a2 | |||
| abdd39d65c | |||
| 3e4e35c9ae | |||
| 3ea6ee3938 | |||
| d178f9c9cb | |||
| 9cbba9ba13 | |||
| b6aaec0b65 | |||
| 11dce39428 | |||
| 2a5701aeb9 | |||
| c6c44a0914 | |||
| 3fe5fdb63f | |||
| 7966073bf8 | |||
| df2635d92f | |||
| ddadc7d5ae | |||
| d77a006ded | |||
| 56b2198df1 | |||
| 63716272c5 | |||
| 1d21c66945 | |||
| f2f78225f3 | |||
| 43d15fce5f | |||
| fef2f1968a | |||
| 01081f6b2e | |||
| d10d5305a3 | |||
| bf393eacae | |||
| 79bb249603 | |||
| 957e0f137b | |||
| 49e90d45f8 | |||
| b6456047a6 | |||
| 02b3f805b2 | |||
| a1af5d2630 | |||
| 77b9fc9934 | |||
| a802035ca4 | |||
| 974190de6b | |||
| 3c38232cd5 | |||
| 224e83ac4c | |||
| b73faa35da | |||
| 0f77155c8d | |||
| 10c2856bf6 | |||
| 9c8643086a | |||
| 244a88e63c | |||
| 807849e257 | |||
| eb37e8814e | |||
| 13c0fec305 | |||
| 08d59ab497 | |||
| 0a0723a560 | |||
| 881c2ddf8c | |||
| 55faee7dd5 | |||
| b5e14e686a | |||
| 6f9d4db419 | |||
| 08edfa8097 | |||
| c7fc8352ac | |||
| 1ee41fb346 | |||
| 8141de5b4b | |||
| f8975fa91d | |||
| cb5e1e62d2 |
4
.envrc
4
.envrc
@@ -1,4 +1,8 @@
|
||||
export DIRENV_WARN_TIMEOUT=20s
|
||||
export AGENT_BROWSER_EXECUTABLE_PATH=/run/current-system/sw/bin/google-chrome-unstable
|
||||
export AGENT_BROWSER_ENGINE=chrome
|
||||
export AGENT_BROWSER_HEADED=0
|
||||
export AGENT_BROWSER_SKILLS_DIR=.claude/skills
|
||||
export OPENCODE_CONFIG_CONTENT="{\"plugin\":[\"superpowers@git+https://github.com/obra/superpowers.git\"]}"
|
||||
|
||||
eval "$(devenv direnvrc)"
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -33,6 +33,8 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
|
||||
.eslintcache
|
||||
.cache
|
||||
*.tsbuildinfo
|
||||
.turbo
|
||||
.worktrees/
|
||||
|
||||
# IntelliJ based IDEs
|
||||
.idea
|
||||
|
||||
@@ -1,52 +1,9 @@
|
||||
## Bun Guidelines
|
||||
## Bun Guide
|
||||
|
||||
**CRITICAL**: Do not assume you know full Bun APIs. For **ANY** Bun API you use, confirm them by using `bun-docs` MCP tools.
|
||||
|
||||
Default to using Bun instead of Node.js.
|
||||
|
||||
- Use `bun <file>` instead of `node <file>` or `ts-node <file>`
|
||||
- Use `bun test` instead of `jest` or `vitest`
|
||||
- Use `bun build <file.html|file.ts|file.css>` instead of `webpack` or `esbuild`
|
||||
- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install`
|
||||
- Use `bun run <script>` instead of `npm run <script>` or `yarn run <script>` or `pnpm run <script>`
|
||||
- Use `bunx <package> <command>` instead of `npx <package> <command>`
|
||||
- Bun automatically loads .env, so don't use dotenv.
|
||||
|
||||
### APIs
|
||||
|
||||
- `Bun.serve()` supports WebSockets, HTTPS, and routes. Don't use `express`.
|
||||
- `bun:sqlite` for SQLite. Don't use `better-sqlite3`.
|
||||
- `Bun.redis` for Redis. Don't use `ioredis`.
|
||||
- `Bun.sql` for Postgres. Don't use `pg` or `postgres.js`.
|
||||
- `WebSocket` is built-in. Don't use `ws`.
|
||||
- Prefer `Bun.file` over `node:fs`'s readFile/writeFile
|
||||
- Bun.$`ls` instead of execa.
|
||||
|
||||
### Testing
|
||||
|
||||
#### Quick Start
|
||||
- Run tests: `bun test`
|
||||
- Write tests in `tests/` folder
|
||||
|
||||
#### Test Structure
|
||||
- Use `describe` blocks to group related tests
|
||||
- Use `test` for individual test cases
|
||||
- Use `beforeEach`/`afterEach` for setup/teardown
|
||||
|
||||
#### Assertions
|
||||
- Import: `import { test, expect, describe, beforeEach, afterEach, mock } from "bun:test";`
|
||||
- Common: `expect(value).toBe(expected)`, `expect(fn).rejects.toThrow()`
|
||||
- Async: `await expect(asyncFn()).resolves.toBe(expected)`
|
||||
|
||||
#### Mocking
|
||||
- Mock functions: `mock(fn)`
|
||||
- Mock globals: `global.fetch = mock(...)`
|
||||
- Restore mocks in `afterEach` or `finally`
|
||||
|
||||
#### Best Practices
|
||||
- Mock external APIs (fetch, file I/O)
|
||||
- Test error cases and edge conditions
|
||||
- Use descriptive test names
|
||||
- Clean up resources in `afterEach`
|
||||
|
||||
For more information, read the Bun API docs in `node_modules/bun-types/docs/**.mdx`.
|
||||
- Package manager/runtime/test runner is Bun `1.3.13`.
|
||||
- Use `bun install`, `bun run <script>`, `bun test`, and `bun build`; do not add npm/yarn/pnpm scripts.
|
||||
- Prefer Bun-native runtime APIs already used in repo: `Bun.serve`, built-in `fetch`, Web APIs, and `bun:test`.
|
||||
- Keep servers framework-free. Do not introduce Express/Koa/Fastify for the adapters.
|
||||
- Bun auto-loads `.env`; do not add `dotenv`.
|
||||
- For tests, import from `bun:test` and restore mocked globals/env in `afterEach` or `finally`.
|
||||
- Root `bun test` is misleading because `bunfig.toml` sets a dummy root. Run package test paths explicitly.
|
||||
|
||||
@@ -2,37 +2,47 @@
|
||||
|
||||
## Repo Shape
|
||||
|
||||
- Bun workspace monorepo.
|
||||
- `packages/core`: scraper logic, parsing, shared cookie/http/format helpers, and the only checked-in tests.
|
||||
- `packages/api-server`: Bun HTTP adapter exposing `/api/*` routes.
|
||||
- `packages/mcp-server`: MCP JSON-RPC adapter that proxies to the API server.
|
||||
- `dist/`: build output. Do not edit generated files here.
|
||||
- `cookies/`: local cookie examples and docs. Never commit real session cookies.
|
||||
- Bun workspace monorepo with packages under `packages/*`.
|
||||
- `packages/core`: scraper behavior, parsing, result types, cookie handling, HTTP helpers.
|
||||
- `packages/api-server`: Bun HTTP adapter exposing `/api/*` routes over core.
|
||||
- `packages/mcp-server`: MCP/JSON-RPC adapter that proxies to the API server.
|
||||
- `cookies/`: local cookie docs/examples only. Treat real cookie files as secrets.
|
||||
- `dist/`, `node_modules/`, `.turbo/`, `.direnv/`, `.devenv/`: generated/vendor/cache. Do not edit.
|
||||
|
||||
## Commands
|
||||
|
||||
- Install: `bun install`
|
||||
- Lint/format check: `bun run ci`
|
||||
- Build everything: `bun run build`
|
||||
- Run tests: `bun test`
|
||||
- Lint/format/typecheck: `bun run ci`
|
||||
- Build all packages: `bun run build`
|
||||
- Build bundled runtime output: `bun run build:all`
|
||||
- Run tests: `bun test packages/core/test packages/api-server/test packages/mcp-server/test`
|
||||
- API dev server: `bun run --cwd packages/api-server dev`
|
||||
- MCP dev server: `bun run --cwd packages/mcp-server dev`
|
||||
|
||||
## Repo Conventions
|
||||
## Boundaries
|
||||
|
||||
- Keep marketplace scraping behavior in `packages/core`. `api-server` and `mcp-server` stay thin adapters.
|
||||
- Preserve cookie precedence everywhere: request parameter > environment variable > cookie file.
|
||||
- Shared public surface for scraper code is `packages/core/src/index.ts`. Update exports deliberately.
|
||||
- Tests should stay deterministic and offline. Mock `fetch`; do not hit live marketplace endpoints.
|
||||
- Use Bun and Bun-native APIs in this repo. Do not introduce Node-specific tooling unless already required.
|
||||
- Biome and strict TypeScript are part of the contract. Fix code to satisfy them; do not relax config.
|
||||
- Marketplace behavior belongs in `packages/core`, not adapter packages.
|
||||
- HTTP route code should parse request input, call core, and map status/errors.
|
||||
- MCP code should define tools, validate JSON-RPC flow, and map tool args to API URLs.
|
||||
- Keep API query params and MCP tool args in sync.
|
||||
- Shared public surface for scraper code is `packages/core/src/index.ts`; update exports deliberately.
|
||||
|
||||
## Invariants
|
||||
|
||||
- Cookie precedence in core helpers: explicit/request cookie string before environment variable.
|
||||
- Tests must be deterministic and offline. Mock `fetch`; do not hit live marketplace endpoints.
|
||||
- Use Bun and Bun-native APIs. Do not add Node-specific tooling unless already required.
|
||||
- Biome and strict TypeScript are contract. Fix code; do not relax config.
|
||||
|
||||
## Verification
|
||||
|
||||
- Core changes: `bun test && bun run ci`
|
||||
- Cross-package contract changes: `bun test && bun run ci && bun run build`
|
||||
- Adapter-only changes: run the relevant package build plus `bun run ci`
|
||||
- Core changes: `bun test packages/core/test && bun run ci`
|
||||
- Adapter-only changes: relevant package build plus `bun run ci`
|
||||
- Cross-package contract changes: `bun test packages/core/test packages/api-server/test packages/mcp-server/test && bun run ci && bun run build`
|
||||
|
||||
## Gotchas
|
||||
|
||||
- The root `build` script emits separate bundles to `dist/api` and `dist/mcp`, then `scripts/start.sh` launches both.
|
||||
- `bunfig.toml` points test root at `./do-not-run-tests-from-root`; pass package test paths explicitly.
|
||||
- Root `build` cleans `dist`, then Turbo emits bundles for API and MCP.
|
||||
- `scripts/start.sh` launches `dist/api/index.js` and `dist/mcp/index.js`.
|
||||
- Package `tsconfig.json` files override root `include`; shared ambient declarations under root `types/` must be included from each package that typechecks cross-package source.
|
||||
|
||||
106
FMARKETPLACE.md
106
FMARKETPLACE.md
@@ -1,44 +1,56 @@
|
||||
# Facebook Marketplace API Reverse Engineering
|
||||
|
||||
## Overview
|
||||
This document tracks findings from reverse-engineering Facebook Marketplace APIs for listing details.
|
||||
|
||||
This document tracks findings from reverse-engineering Facebook Marketplace APIs for
|
||||
listing details.
|
||||
|
||||
## Current Implementation Status
|
||||
|
||||
- Search functionality: Implemented in `src/facebook.ts`
|
||||
- Individual listing details: Not yet implemented
|
||||
|
||||
## Findings
|
||||
|
||||
### Step 1: Initial Setup
|
||||
|
||||
- Using Chrome DevTools to inspect Facebook Marketplace
|
||||
- Need to authenticate with Facebook account to access marketplace data
|
||||
- Cookies required for full access
|
||||
- Current status: Successfully logged in and accessed marketplace data
|
||||
|
||||
### Step 2: Individual Listing Details Analysis - COMPLETED
|
||||
|
||||
- **Data Location**: Embedded in HTML script tags within `require` array structure
|
||||
- **Path**: `require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
|
||||
- **Path**:
|
||||
`require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
|
||||
- **Authentication**: Required for full data access
|
||||
- **Current Status**: Successfully reverse-engineered the API structure and data extraction method
|
||||
- **Current Status**: Successfully reverse-engineered the API structure and data
|
||||
extraction method
|
||||
|
||||
### API Endpoints Discovered
|
||||
|
||||
#### Search Endpoint
|
||||
|
||||
- URL: `https://www.facebook.com/marketplace/{location}/search`
|
||||
- Parameters: `query`, `sortBy`, `exact`
|
||||
- Data embedded in HTML script tags with `require` structure
|
||||
- Authentication: Required (cookies)
|
||||
|
||||
#### Listing Details Endpoint
|
||||
|
||||
- **URL Structure**: `https://www.facebook.com/marketplace/item/{listing_id}/`
|
||||
- **Data Source**: Server-side rendered HTML with embedded JSON data in script tags
|
||||
- **Data Structure**: Relay/GraphQL style data structure under `require[0][3].__bbox.require[...].__bbox.result.data.viewer.marketplace_product_details_page.target`
|
||||
- **Extraction Method**: Parse JSON from script tags containing marketplace data, navigate to the target object
|
||||
- **Data Structure**: Relay/GraphQL style data structure under
|
||||
`require[0][3].__bbox.require[...].__bbox.result.data.viewer.marketplace_product_details_page.target`
|
||||
- **Extraction Method**: Parse JSON from script tags containing marketplace data,
|
||||
navigate to the target object
|
||||
- **Authentication**: Required (cookies)
|
||||
|
||||
### Listing Data Structure Discovered (Current - 2026)
|
||||
|
||||
The current Facebook Marketplace API returns a comprehensive `GroupCommerceProductItem` object with the following key properties:
|
||||
The current Facebook Marketplace API returns a comprehensive `GroupCommerceProductItem`
|
||||
object with the following key properties:
|
||||
|
||||
```typescript
|
||||
interface FacebookMarketplaceItem {
|
||||
@@ -151,6 +163,7 @@ interface FacebookMarketplaceItem {
|
||||
```
|
||||
|
||||
### Example Data Extracted (Current Structure)
|
||||
|
||||
```json
|
||||
{
|
||||
"__typename": "GroupCommerceProductItem",
|
||||
@@ -228,36 +241,47 @@ interface FacebookMarketplaceItem {
|
||||
## Data Extraction Method
|
||||
|
||||
### Current Method (2026)
|
||||
Facebook Marketplace listing data is embedded in JSON within `<script>` tags in the HTML response. The extraction process:
|
||||
|
||||
1. **Find the Correct Script**: Look for script tags containing marketplace listing data by searching for key fields like `marketplace_listing_title`, `redacted_description`, and `formatted_price`.
|
||||
Facebook Marketplace listing data is embedded in JSON within `<script>` tags in the HTML
|
||||
response. The extraction process:
|
||||
|
||||
1. **Find the Correct Script**: Look for script tags containing marketplace listing data
|
||||
by searching for key fields like `marketplace_listing_title`, `redacted_description`,
|
||||
and `formatted_price`.
|
||||
|
||||
2. **Parse JSON Structure**: The data is nested within a `require` array structure:
|
||||
```
|
||||
require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target
|
||||
```
|
||||
|
||||
3. **Navigate to Target Object**: The actual listing data is a `GroupCommerceProductItem` object containing comprehensive information about the listing, seller, and vehicle details.
|
||||
3. **Navigate to Target Object**: The actual listing data is a
|
||||
`GroupCommerceProductItem` object containing comprehensive information about the
|
||||
listing, seller, and vehicle details.
|
||||
|
||||
4. **Handle Dynamic Structure**: Facebook may change the exact path, so robust extraction should search for the target object recursively within the parsed JSON.
|
||||
4. **Handle Dynamic Structure**: Facebook may change the exact path, so robust
|
||||
extraction should search for the target object recursively within the parsed JSON.
|
||||
|
||||
### Authentication Requirements
|
||||
|
||||
- Valid Facebook session cookies are required
|
||||
- User must be logged in to Facebook
|
||||
- Marketplace access may be location-restricted
|
||||
|
||||
## Tools Used
|
||||
|
||||
- Chrome DevTools Protocol
|
||||
- Network monitoring
|
||||
- HTML/script parsing
|
||||
- JSON structure analysis
|
||||
|
||||
## Implementation Status
|
||||
|
||||
- ✅ Successfully reverse-engineered Facebook Marketplace API for listing details
|
||||
- ✅ Identified current data structure and extraction method (2026)
|
||||
- ✅ Documented comprehensive GroupCommerceProductItem interface
|
||||
- ✅ Implemented `extractFacebookItemData()` function with script parsing logic
|
||||
- ✅ Implemented `parseFacebookItem()` function to convert GroupCommerceProductItem to ListingDetails
|
||||
- ✅ Implemented `parseFacebookItem()` function to convert GroupCommerceProductItem to
|
||||
ListingDetails
|
||||
- ✅ Implemented `fetchFacebookItem()` function with authentication and error handling
|
||||
- ✅ Updated TypeScript interfaces to match current API structure
|
||||
- ✅ Added robust extraction with fallback methods for changing API paths
|
||||
@@ -266,12 +290,15 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
|
||||
|
||||
### Core Functions Implemented
|
||||
|
||||
1. **`extractFacebookItemData(htmlString)`**: Extracts marketplace item data from HTML-embedded JSON in script tags
|
||||
1. **`extractFacebookItemData(htmlString)`**: Extracts marketplace item data from
|
||||
HTML-embedded JSON in script tags
|
||||
- Searches for scripts containing marketplace listing data
|
||||
- Uses primary path: `require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
|
||||
- Uses primary path:
|
||||
`require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
|
||||
- Falls back to recursive search for GroupCommerceProductItem objects
|
||||
|
||||
2. **`parseFacebookItem(item)`**: Converts Facebook's GroupCommerceProductItem to unified ListingDetails format
|
||||
2. **`parseFacebookItem(item)`**: Converts Facebook’s GroupCommerceProductItem to
|
||||
unified ListingDetails format
|
||||
- Handles pricing (FREE listings, CAD currency)
|
||||
- Extracts seller information, location, and status
|
||||
- Supports vehicle-specific metadata
|
||||
@@ -284,25 +311,31 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
|
||||
- Returns parsed ListingDetails or null on failure
|
||||
|
||||
### Authentication Requirements
|
||||
- Facebook session cookies required in `./cookies/facebook.json` or provided as parameter
|
||||
|
||||
- Facebook session cookies required in `./cookies/facebook.json` or provided as
|
||||
parameter
|
||||
- Cookies must include valid authentication tokens for marketplace access
|
||||
- Handles cookie expiration and domain validation
|
||||
|
||||
## Current Implementation Status - 2026 Verification
|
||||
|
||||
### Step 3: API Verification and Current Structure Analysis (January 2026)
|
||||
|
||||
- **Verification Date**: January 22, 2026
|
||||
- **Status**: Successfully verified current Facebook Marketplace API structure
|
||||
- **Data Source**: Embedded JSON in HTML script tags (server-side rendered)
|
||||
- **Extraction Path**: `require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
|
||||
- **Extraction Path**:
|
||||
`require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
|
||||
|
||||
#### Verified Listing Structure (Real Example - 2006 Hyundai Tiburon)
|
||||
|
||||
- **Listing ID**: 1226468515995685
|
||||
- **Title**: "2006 Hyundai Tiburon"
|
||||
- **Title**: “2006 Hyundai Tiburon”
|
||||
- **Price**: CA$3,000 (formatted_price.text)
|
||||
- **Raw Price Data**: {"amount_with_offset": "300000", "currency": "CAD", "amount": "3000.00"}
|
||||
- **Raw Price Data**: {"amount_with_offset": “300000”, “currency”: “CAD”, “amount”:
|
||||
"3000.00"}
|
||||
- **Location**: Hamilton, ON (with coordinates: 43.250427246094, -79.963989257812)
|
||||
- **Description**: "As is" (redacted_description.text)
|
||||
- **Description**: “As is” (redacted_description.text)
|
||||
- **Vehicle Details**:
|
||||
- Make: Hyundai
|
||||
- Model: Tiburon
|
||||
@@ -323,41 +356,54 @@ Facebook Marketplace listing data is embedded in JSON within `<script>` tags in
|
||||
- **Messaging**: Enabled
|
||||
|
||||
#### Current API Characteristics
|
||||
|
||||
- **Authentication**: Still requires valid Facebook session cookies
|
||||
- **Data Format**: Server-side rendered HTML with embedded GraphQL/Relay JSON
|
||||
- **Structure Stability**: Primary extraction path remains functional
|
||||
- **Additional Features**: Includes marketplace ratings, seller verification badges, cross-posting info
|
||||
- **Additional Features**: Includes marketplace ratings, seller verification badges,
|
||||
cross-posting info
|
||||
|
||||
### API Changes Observed Since 2024 Documentation
|
||||
|
||||
- **Minimal Changes**: Core data structure largely unchanged
|
||||
- **Enhanced Fields**: Added more detailed vehicle specifications and seller profile information
|
||||
- **GraphQL Integration**: Deeper integration with Facebook's GraphQL infrastructure
|
||||
- **Enhanced Fields**: Added more detailed vehicle specifications and seller profile
|
||||
information
|
||||
- **GraphQL Integration**: Deeper integration with Facebook’s GraphQL infrastructure
|
||||
- **Security Features**: Additional integrity checks and reporting mechanisms
|
||||
|
||||
### Multi-Category Testing Results (January 2026)
|
||||
|
||||
Successfully tested extraction across different listing categories:
|
||||
|
||||
#### 1. Vehicle Listings (Automotive)
|
||||
|
||||
- **Example**: 2006 Hyundai Tiburon (ID: 1226468515995685)
|
||||
- **Status**: ✅ Fully functional
|
||||
- **Data Extracted**: Complete vehicle specs, pricing, seller info, location coordinates
|
||||
- **Unique Fields**: vehicle_make_display_name, vehicle_odometer_data, vehicle_transmission_type, vehicle_exterior_color, vehicle_interior_color, vehicle_fuel_type
|
||||
- **Unique Fields**: vehicle_make_display_name, vehicle_odometer_data,
|
||||
vehicle_transmission_type, vehicle_exterior_color, vehicle_interior_color,
|
||||
vehicle_fuel_type
|
||||
|
||||
#### 2. Electronics Listings
|
||||
|
||||
- **Example**: Nintendo Switch (ID: 3903865769914262)
|
||||
- **Status**: ✅ Fully functional
|
||||
- **Data Extracted**: Title, price (CA$140), location (Toronto, ON), condition (Used - like new), seller (Yitao Hou)
|
||||
- **Data Extracted**: Title, price (CA$140), location (Toronto, ON), condition (Used -
|
||||
like new), seller (Yitao Hou)
|
||||
- **Category**: Electronics (category_id: 479353692612078)
|
||||
- **Notes**: Standard GroupCommerceProductItem structure applies
|
||||
|
||||
#### 3. Home Goods/Furniture Listings
|
||||
|
||||
- **Example**: Tabletop Mirror (cat not included) (ID: 1082389057290709)
|
||||
- **Status**: ✅ Fully functional
|
||||
- **Data Extracted**: Title, price (CA$5), location (Mississauga, ON), condition (Used - like new), seller (Rohit Rehan)
|
||||
- **Data Extracted**: Title, price (CA$5), location (Mississauga, ON), condition (Used -
|
||||
like new), seller (Rohit Rehan)
|
||||
- **Category**: Home Goods (category_id: 1569171756675761)
|
||||
- **Notes**: Includes detailed description and delivery options
|
||||
|
||||
#### Testing Summary
|
||||
|
||||
- **Extraction Method**: Consistent across all categories
|
||||
- **Data Structure**: GroupCommerceProductItem interface works for all listing types
|
||||
- **Authentication**: Required for all categories
|
||||
@@ -365,18 +411,22 @@ Successfully tested extraction across different listing categories:
|
||||
- **Edge Cases**: All tested listings were active/in-person pickup
|
||||
|
||||
## Implementation Status - COMPLETED (January 2026)
|
||||
|
||||
- ✅ Successfully reverse-engineered Facebook Marketplace API for listing details
|
||||
- ✅ Verified current API structure and extraction method (January 2026)
|
||||
- ✅ Tested extraction across multiple listing categories (vehicles, electronics, home goods)
|
||||
- ✅ Implemented comprehensive error handling for sold/removed listings and authentication failures
|
||||
- ✅ Tested extraction across multiple listing categories (vehicles, electronics, home
|
||||
goods)
|
||||
- ✅ Implemented comprehensive error handling for sold/removed listings and
|
||||
authentication failures
|
||||
- ✅ Enhanced rate limiting and retry logic (already robust)
|
||||
- ✅ Added monitoring and metrics for API stability detection
|
||||
- ✅ Updated all scraper functions to use verified extraction methods
|
||||
- ✅ Documented comprehensive GroupCommerceProductItem interface with real examples
|
||||
|
||||
## Next Steps (Future Maintenance)
|
||||
|
||||
1. Monitor extraction success rates for API change detection
|
||||
2. Update extraction paths if Facebook changes their API structure
|
||||
3. Add support for additional marketplace features as they become available
|
||||
4. Implement caching mechanisms for improved performance
|
||||
5. Add support for marketplace messaging and negotiation features
|
||||
5. Add support for marketplace messaging and negotiation features
|
||||
|
||||
145
KIJIJI.md
145
KIJIJI.md
@@ -1,9 +1,13 @@
|
||||
# Kijiji API Findings
|
||||
|
||||
## Overview
|
||||
Kijiji is a Canadian classifieds marketplace that uses a modern web application built with Next.js and Apollo GraphQL. The search results are powered by a GraphQL API with client-side state management.
|
||||
|
||||
Kijiji is a Canadian classifieds marketplace that uses a modern web application built
|
||||
with Next.js and Apollo GraphQL. The search results are powered by a GraphQL API with
|
||||
client-side state management.
|
||||
|
||||
## Initial Page Load (Homepage)
|
||||
|
||||
- **URL**: https://www.kijiji.ca/
|
||||
- **Architecture**: Server-side rendered React application with Next.js
|
||||
- **Data Sources**:
|
||||
@@ -12,18 +16,27 @@ Kijiji is a Canadian classifieds marketplace that uses a modern web application
|
||||
- No initial API calls for listings - data appears to be embedded in HTML
|
||||
|
||||
## Search Results Page
|
||||
|
||||
- **URL Pattern**: `https://www.kijiji.ca/b-[location]/[keywords]/k0l0`
|
||||
- **Example**: `https://www.kijiji.ca/b-canada/iphone/k0l0`
|
||||
- **Technology Stack**: Next.js with Apollo GraphQL client
|
||||
- **Data Structure**: Uses `__APOLLO_STATE__` global object containing normalized GraphQL cache
|
||||
- **Data Structure**: Uses `__APOLLO_STATE__` global object containing normalized
|
||||
GraphQL cache
|
||||
|
||||
### GraphQL Data Structure
|
||||
|
||||
#### Data Location
|
||||
Search results data is embedded in the Next.js page props under `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`. The data is pre-rendered on the server and sent to the client. Each page (including pagination) has its own pre-rendered data.
|
||||
|
||||
Search results data is embedded in the Next.js page props under
|
||||
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`. The data is pre-rendered on the server
|
||||
and sent to the client.
|
||||
Each page (including pagination) has its own pre-rendered data.
|
||||
|
||||
#### Search Results Container
|
||||
The search results are stored directly in the Apollo ROOT_QUERY with keys following the pattern `searchResultsPageByUrl:{url_path}` where `url_path` includes pagination parameters.
|
||||
|
||||
The search results are stored directly in the Apollo ROOT_QUERY with keys following the
|
||||
pattern `searchResultsPageByUrl:{url_path}` where `url_path` includes pagination
|
||||
parameters.
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -33,17 +46,20 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
|
||||
```
|
||||
|
||||
#### Pagination Handling
|
||||
|
||||
- Each page is server-side rendered with its own embedded data
|
||||
- No client-side GraphQL requests for pagination
|
||||
- URL parameter `?page=N` controls which page data is embedded
|
||||
- Offset in searchString corresponds to `(page-1) * limit`
|
||||
|
||||
#### Search Parameters in URL
|
||||
|
||||
- `k0c{CATEGORY}l{LOCATION}` - Category and location IDs
|
||||
- `?page=N` - Page number (1-based)
|
||||
- Data contains `offset` and `limit` for API-style pagination
|
||||
|
||||
#### Individual Listing Structure
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "1732061412",
|
||||
@@ -90,6 +106,7 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
|
||||
```
|
||||
|
||||
### URL Parameters
|
||||
|
||||
- `sort=MATCH` - Sort by relevance
|
||||
- `order=DESC` - Descending order
|
||||
- `type=OFFER` - Show offerings (not wanted ads)
|
||||
@@ -102,6 +119,7 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
|
||||
- `eaTopAdPosition=1` - ?
|
||||
|
||||
### Image API
|
||||
|
||||
- **Endpoint**: `https://media.kijiji.ca/api/v1/`
|
||||
- **Pattern**: `/ca-prod-fsbo-ads/images/{uuid}?rule=kijijica-{size}-jpg`
|
||||
- **Sizes**: 200, 300, 400, 500 pixels
|
||||
@@ -109,10 +127,12 @@ The search results are stored directly in the Apollo ROOT_QUERY with keys follow
|
||||
### Categories and Locations
|
||||
|
||||
#### Category Structure
|
||||
Categories are hierarchical with parent-child relationships. The main categories under "Buy & Sell" include:
|
||||
|
||||
Categories are hierarchical with parent-child relationships.
|
||||
The main categories under “Buy & Sell” include:
|
||||
|
||||
| ID | Name | Total Results (iPhone search) |
|
||||
|----|------|------------------------------|
|
||||
| --- | --- | --- |
|
||||
| 10 | Buy & Sell | 19956 |
|
||||
| 12 | Arts & Collectibles | 149 |
|
||||
| 767 | Audio | 481 |
|
||||
@@ -145,10 +165,11 @@ Categories are hierarchical with parent-child relationships. The main categories
|
||||
| 26 | Other | 286 |
|
||||
|
||||
#### Location Structure
|
||||
Locations are also hierarchical, with provinces/states under the main "Canada" location:
|
||||
|
||||
Locations are also hierarchical, with provinces/states under the main “Canada” location:
|
||||
|
||||
| ID | Name | Total Results (iPhone search) |
|
||||
|----|------|------------------------------|
|
||||
| --- | --- | --- |
|
||||
| 0 | Canada | - |
|
||||
| 9001 | Québec | 2516 |
|
||||
| 9002 | Nova Scotia | 875 |
|
||||
@@ -163,16 +184,20 @@ Locations are also hierarchical, with provinces/states under the main "Canada" l
|
||||
| 9011 | Prince Edward Island | 31 |
|
||||
|
||||
#### URL Patterns
|
||||
|
||||
- Categories: `/b-{category-slug}/canada/{keywords}/k0c{CATEGORY_ID}l0`
|
||||
- Locations: `/b-buy-sell/{location-slug}/iphone/k0c10l{LOCATION_ID}`
|
||||
- Combined: `/b-{category-slug}/{location-slug}/{keywords}/k0c{CATEGORY_ID}l{LOCATION_ID}`
|
||||
- Combined:
|
||||
`/b-{category-slug}/{location-slug}/{keywords}/k0c{CATEGORY_ID}l{LOCATION_ID}`
|
||||
|
||||
### Pagination
|
||||
|
||||
- Uses offset-based pagination
|
||||
- 40 results per page
|
||||
- Total count provided in pagination metadata
|
||||
|
||||
## Authentication & User Management
|
||||
|
||||
- **Authentication System**: OAuth2-based using CIS (Customer Identity Service)
|
||||
- **Identity Provider**: `id.kijiji.ca`
|
||||
- **OAuth2 Flow**:
|
||||
@@ -184,24 +209,30 @@ Locations are also hierarchical, with provinces/states under the main "Canada" l
|
||||
- **User Features**: Saved searches, messaging, flagging require authentication
|
||||
|
||||
## Posting API
|
||||
|
||||
- **Posting Flow**: Requires authentication, redirects to login if not authenticated
|
||||
- **Posting URL**: `https://www.kijiji.ca/p-post-ad.html`
|
||||
- **Authentication Required**: Yes, redirects to `/consumer/login` for unauthenticated users
|
||||
- **Post-Creation**: Likely uses authenticated GraphQL mutations (not observed in anonymous browsing)
|
||||
- **Authentication Required**: Yes, redirects to `/consumer/login` for unauthenticated
|
||||
users
|
||||
- **Post-Creation**: Likely uses authenticated GraphQL mutations (not observed in
|
||||
anonymous browsing)
|
||||
|
||||
## GraphQL API Endpoint
|
||||
|
||||
- **URL**: `https://www.kijiji.ca/anvil/api`
|
||||
- **Method**: POST
|
||||
- **Content-Type**: application/json
|
||||
- **Headers**:
|
||||
- `apollo-require-preflight: true`
|
||||
- Standard CORS headers
|
||||
- **Authentication**: No authentication required for basic queries (uses cookies for session tracking)
|
||||
- **Authentication**: No authentication required for basic queries (uses cookies for
|
||||
session tracking)
|
||||
- **Technology**: Apollo GraphQL server
|
||||
|
||||
### Sample GraphQL Queries Discovered
|
||||
|
||||
#### Get Search Categories
|
||||
|
||||
```graphql
|
||||
query getSearchCategories($locale: String!) {
|
||||
searchCategories {
|
||||
@@ -218,6 +249,7 @@ Variables: `{"locale": "en-CA"}`
|
||||
Response includes hierarchical category structure with IDs and localized names.
|
||||
|
||||
#### Get Geocode from IP (fails for current IP)
|
||||
|
||||
```graphql
|
||||
query GetGeocodeReverseFromIp {
|
||||
geocodeReverseFromIp {
|
||||
@@ -229,9 +261,11 @@ query GetGeocodeReverseFromIp {
|
||||
}
|
||||
```
|
||||
|
||||
This query fails for the current IP address, suggesting geolocation-based features may not work or require different IP ranges.
|
||||
This query fails for the current IP address, suggesting geolocation-based features may
|
||||
not work or require different IP ranges.
|
||||
|
||||
#### Get Category Path
|
||||
|
||||
```graphql
|
||||
query GetCategoryPath($categoryId: Int!, $locale: String, $locationId: Int) {
|
||||
category(id: $categoryId) {
|
||||
@@ -256,25 +290,33 @@ Variables: `{"categoryId": 10, "locationId": 0, "locale": "en-CA"}`
|
||||
## Latest Findings (2026-01-21)
|
||||
|
||||
### Client-Side GraphQL Queries Observed
|
||||
|
||||
- **getSearchCategories**: Retrieves category hierarchy for search filters
|
||||
- **GetGeocodeReverseFromIp**: Attempts to geolocate user (fails for current IP)
|
||||
|
||||
### GraphQL Schema Insights
|
||||
Testing direct GraphQL queries revealed:
|
||||
- Field "searchResults" does not exist on Query type
|
||||
- Suggested alternatives: "searchResultsPage" or "searchUrl"
|
||||
- This suggests the search functionality may use different GraphQL operations than direct queries
|
||||
|
||||
The embedded Apollo state approach appears to be the primary method for accessing search data, with GraphQL used for auxiliary operations like categories and geolocation.
|
||||
Testing direct GraphQL queries revealed:
|
||||
- Field “searchResults” does not exist on Query type
|
||||
- Suggested alternatives: “searchResultsPage” or “searchUrl”
|
||||
- This suggests the search functionality may use different GraphQL operations than
|
||||
direct queries
|
||||
|
||||
The embedded Apollo state approach appears to be the primary method for accessing search
|
||||
data, with GraphQL used for auxiliary operations like categories and geolocation.
|
||||
|
||||
### Server-Side Rendering Architecture
|
||||
Search results are fully server-side rendered with data embedded in HTML. Each page (including pagination) contains its own pre-rendered data. No client-side GraphQL requests are made for:
|
||||
|
||||
Search results are fully server-side rendered with data embedded in HTML. Each page
|
||||
(including pagination) contains its own pre-rendered data.
|
||||
No client-side GraphQL requests are made for:
|
||||
|
||||
- Initial search results
|
||||
- Pagination navigation
|
||||
- Search result data
|
||||
|
||||
### Network Analysis Findings
|
||||
|
||||
- GraphQL endpoint: `https://www.kijiji.ca/anvil/api`
|
||||
- Method: POST
|
||||
- Content-Type: application/json
|
||||
@@ -282,7 +324,10 @@ Search results are fully server-side rendered with data embedded in HTML. Each p
|
||||
- Cookies required for session tracking
|
||||
|
||||
### Embedded Data Structure
|
||||
Search results data is embedded in the HTML within Next.js `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__` object. The data includes:
|
||||
|
||||
Search results data is embedded in the HTML within Next.js
|
||||
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__` object.
|
||||
The data includes:
|
||||
|
||||
- Individual ad listings with complete metadata
|
||||
- Pagination information
|
||||
@@ -290,20 +335,24 @@ Search results data is embedded in the HTML within Next.js `__NEXT_DATA__.props.
|
||||
- Category/location hierarchies
|
||||
|
||||
### Current Scraper Implementation
|
||||
|
||||
The existing `src/kijiji.ts` implementation correctly parses the embedded Apollo state:
|
||||
|
||||
- Uses `extractApolloState()` to parse `__NEXT_DATA__` from HTML
|
||||
- Filters Apollo keys containing "Listing" to find ad data
|
||||
- Filters Apollo keys containing “Listing” to find ad data
|
||||
- Extracts `url`, `title`, and other metadata from each listing
|
||||
- Successfully scrapes listings without needing API authentication
|
||||
|
||||
### Authentication Status
|
||||
- **Search functionality**: No authentication required - all search and listing data accessible anonymously
|
||||
|
||||
- **Search functionality**: No authentication required - all search and listing data
|
||||
accessible anonymously
|
||||
- **Posting functionality**: Requires authentication (redirects to login)
|
||||
- **User features**: Saved searches, messaging require authentication
|
||||
- **Rate limiting**: May apply but not observed in anonymous browsing
|
||||
|
||||
### Pagination Implementation
|
||||
|
||||
- Each page is a separate server-rendered route
|
||||
- URL pattern: `/b-{location}/{keywords}/page-{number}/k0{category}l{location_id}`
|
||||
- No client-side pagination API calls
|
||||
@@ -313,20 +362,24 @@ The existing `src/kijiji.ts` implementation correctly parses the embedded Apollo
|
||||
## URL Pattern Analysis
|
||||
|
||||
### Search URL Structure
|
||||
|
||||
`https://www.kijiji.ca/b-{category_slug}/{location_slug}/{keywords}/k0c{category_id}l{location_id}`
|
||||
|
||||
#### Examples Observed:
|
||||
|
||||
- All categories, Canada: `/b-canada/iphone/k0l0` (c0 = All Categories, l0 = Canada)
|
||||
- Cell phones category: `/b-cell-phones/canada/iphone/k0c132l0` (c132 = Cell Phones)
|
||||
- With pagination: `/b-canada/iphone/page-2/k0l0`
|
||||
|
||||
#### URL Components:
|
||||
|
||||
- `c{CATEGORY_ID}`: Category ID (0 = All Categories, 132 = Cell Phones, etc.)
|
||||
- `l{LOCATION_ID}`: Location ID (0 = Canada, 1700272 = GTA, etc.)
|
||||
- `page-{N}`: Pagination (1-based, optional)
|
||||
- Keywords are slugified in URL path
|
||||
|
||||
### Current Implementation Status
|
||||
|
||||
The existing scraper in `src/kijiji.ts` successfully implements the approach:
|
||||
- Parses embedded Apollo state from HTML responses
|
||||
- Handles rate limiting and retries
|
||||
@@ -336,14 +389,22 @@ The existing scraper in `src/kijiji.ts` successfully implements the approach:
|
||||
## Listing Details Page
|
||||
|
||||
### Overview
|
||||
Similar to search results, listing details pages use server-side rendering with embedded Apollo GraphQL state in the HTML. No dedicated API endpoint serves individual listing data - all information is pre-rendered on the server.
|
||||
|
||||
Similar to search results, listing details pages use server-side rendering with embedded
|
||||
Apollo GraphQL state in the HTML. No dedicated API endpoint serves individual listing
|
||||
data - all information is pre-rendered on the server.
|
||||
|
||||
### Data Architecture
|
||||
- **Server-Side Rendering**: Each listing page is fully server-rendered with data embedded in HTML
|
||||
- **Embedded Apollo State**: Listing data is stored in `__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`
|
||||
- **Client-Side GraphQL**: Additional data (categories, campaigns, similar listings, user profiles) fetched via GraphQL API
|
||||
|
||||
- **Server-Side Rendering**: Each listing page is fully server-rendered with data
|
||||
embedded in HTML
|
||||
- **Embedded Apollo State**: Listing data is stored in
|
||||
`__NEXT_DATA__.props.pageProps.__APOLLO_STATE__`
|
||||
- **Client-Side GraphQL**: Additional data (categories, campaigns, similar listings,
|
||||
user profiles) fetched via GraphQL API
|
||||
|
||||
### Listing Data Structure
|
||||
|
||||
The main listing data follows the same pattern as search results:
|
||||
|
||||
```json
|
||||
@@ -385,40 +446,50 @@ The main listing data follows the same pattern as search results:
|
||||
```
|
||||
|
||||
### Client-Side GraphQL Queries
|
||||
|
||||
When loading a listing details page, the following GraphQL queries are executed:
|
||||
|
||||
#### 1. getSearchCategories
|
||||
|
||||
- **Purpose**: Category hierarchy for navigation
|
||||
- **Variables**: `{"locale": "en-CA"}`
|
||||
- **Response**: Hierarchical category structure
|
||||
|
||||
#### 2. getCampaignsForVip
|
||||
|
||||
- **Purpose**: Advertisement targeting data
|
||||
- **Variables**: `{"placement": "vip", "locationId": 1700275, "categoryId": 760, "platform": "desktop"}`
|
||||
- **Variables**:
|
||||
`{"placement": "vip", "locationId": 1700275, "categoryId": 760, "platform": "desktop"}`
|
||||
- **Response**: Campaign/ads data (usually null)
|
||||
|
||||
#### 3. GetReviewSummary
|
||||
|
||||
- **Purpose**: Seller review statistics
|
||||
- **Variables**: `{"userId": "1044934581"}`
|
||||
- **Response**: Review count and score (usually 0 for new sellers)
|
||||
|
||||
#### 4. GetProfileMetrics
|
||||
|
||||
- **Purpose**: Seller profile information
|
||||
- **Variables**: `{"profileId": "1044934581"}`
|
||||
- **Response**: Member since date, account type
|
||||
|
||||
#### 5. GetListingsSimilar
|
||||
|
||||
- **Purpose**: Similar listings for cross-selling
|
||||
- **Variables**: `{"listingId": "1705585530", "limit": 10, "isExternalId": false}`
|
||||
- **Response**: Array of similar listings with basic metadata
|
||||
|
||||
#### 6. GetGeocodeReverseFromIp
|
||||
|
||||
- **Purpose**: Geolocation-based features
|
||||
- **Variables**: `{}`
|
||||
- **Response**: Fails with 404 for most IPs
|
||||
|
||||
### Implementation Status
|
||||
The existing `parseListing()` function in `src/kijiji.ts` successfully extracts listing details from embedded Apollo state:
|
||||
|
||||
The existing `parseListing()` function in `src/kijiji.ts` successfully extracts listing
|
||||
details from embedded Apollo state:
|
||||
|
||||
- ✅ Extracts title, description, price, location
|
||||
- ✅ Handles contact-based pricing ("Please Contact")
|
||||
@@ -427,22 +498,30 @@ The existing `parseListing()` function in `src/kijiji.ts` successfully extracts
|
||||
- ✅ Works without authentication or API keys
|
||||
|
||||
### Key Findings
|
||||
1. **No Dedicated Listing API**: Unlike search results, there's no separate GraphQL query for individual listing data
|
||||
2. **Complete Data Available**: All listing information is embedded in the initial HTML response
|
||||
3. **Additional Context Fetched**: Secondary GraphQL queries provide complementary data (reviews, similar listings)
|
||||
|
||||
1. **No Dedicated Listing API**: Unlike search results, there’s no separate GraphQL
|
||||
query for individual listing data
|
||||
2. **Complete Data Available**: All listing information is embedded in the initial HTML
|
||||
response
|
||||
3. **Additional Context Fetched**: Secondary GraphQL queries provide complementary data
|
||||
(reviews, similar listings)
|
||||
4. **Consistent Architecture**: Same Apollo state embedding pattern as search pages
|
||||
|
||||
### Current Scraper Implementation
|
||||
|
||||
The scraper successfully extracts listing details by:
|
||||
1. Fetching the listing URL HTML
|
||||
2. Parsing embedded `__NEXT_DATA__` Apollo state
|
||||
3. Extracting the `Listing:{id}` object from Apollo cache
|
||||
4. Mapping fields to typed `ListingDetails` interface
|
||||
|
||||
This approach works reliably without requiring authentication or dealing with rate limiting on individual listing fetches.
|
||||
This approach works reliably without requiring authentication or dealing with rate
|
||||
limiting on individual listing fetches.
|
||||
|
||||
## Next Steps
|
||||
|
||||
- Explore posting/authentication APIs (requires user login)
|
||||
- Investigate if GraphQL API can be used for programmatic access with proper authentication
|
||||
- Investigate if GraphQL API can be used for programmatic access with proper
|
||||
authentication
|
||||
- Test rate limiting patterns and optimal scraping strategies
|
||||
- Document additional category and location ID mappings
|
||||
|
||||
@@ -15,7 +15,10 @@
|
||||
"linter": {
|
||||
"enabled": true,
|
||||
"rules": {
|
||||
"recommended": true
|
||||
"recommended": true,
|
||||
"correctness": {
|
||||
"noUnusedImports": "error"
|
||||
}
|
||||
}
|
||||
},
|
||||
"javascript": {
|
||||
|
||||
64
bun.lock
64
bun.lock
@@ -4,8 +4,13 @@
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "marketplace-scrapers-monorepo",
|
||||
"dependencies": {
|
||||
"@types/bun": "1.3.13",
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "2.3.11",
|
||||
"@tsconfig/bun": "catalog:",
|
||||
"turbo": "2.5.4",
|
||||
},
|
||||
},
|
||||
"packages/api-server": {
|
||||
@@ -13,9 +18,10 @@
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@marketplace-scrapers/core": "workspace:*",
|
||||
"@typescript/native-preview": "catalog:",
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "latest",
|
||||
"@types/bun": "catalog:",
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5",
|
||||
@@ -25,14 +31,16 @@
|
||||
"name": "@marketplace-scrapers/core",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@typescript/native-preview": "catalog:",
|
||||
"argon2-wasm-pro": "1.1.0",
|
||||
"cli-progress": "^3.12.0",
|
||||
"linkedom": "^0.18.12",
|
||||
"unidecode": "^1.1.0",
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "latest",
|
||||
"@types/cli-progress": "^3.11.6",
|
||||
"@types/unidecode": "^1.1.0",
|
||||
"@types/bun": "catalog:",
|
||||
"@types/cli-progress": "catalog:",
|
||||
"@types/unidecode": "catalog:",
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5",
|
||||
@@ -43,15 +51,23 @@
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@marketplace-scrapers/core": "workspace:*",
|
||||
"@typescript/native-preview": "catalog:",
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "latest",
|
||||
"@types/bun": "catalog:",
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5",
|
||||
},
|
||||
},
|
||||
},
|
||||
"catalog": {
|
||||
"@tsconfig/bun": "1.0.9",
|
||||
"@types/bun": "1.3.13",
|
||||
"@types/cli-progress": "3.11.6",
|
||||
"@types/unidecode": "1.1.0",
|
||||
"@typescript/native-preview": "7.0.0-dev.20260428.1",
|
||||
},
|
||||
"packages": {
|
||||
"@biomejs/biome": ["@biomejs/biome@2.3.11", "", { "optionalDependencies": { "@biomejs/cli-darwin-arm64": "2.3.11", "@biomejs/cli-darwin-x64": "2.3.11", "@biomejs/cli-linux-arm64": "2.3.11", "@biomejs/cli-linux-arm64-musl": "2.3.11", "@biomejs/cli-linux-x64": "2.3.11", "@biomejs/cli-linux-x64-musl": "2.3.11", "@biomejs/cli-win32-arm64": "2.3.11", "@biomejs/cli-win32-x64": "2.3.11" }, "bin": { "biome": "bin/biome" } }, "sha512-/zt+6qazBWguPG6+eWmiELqO+9jRsMZ/DBU3lfuU2ngtIQYzymocHhKiZRyrbra4aCOoyTg/BmY+6WH5mv9xmQ=="],
|
||||
|
||||
@@ -77,7 +93,9 @@
|
||||
|
||||
"@marketplace-scrapers/mcp-server": ["@marketplace-scrapers/mcp-server@workspace:packages/mcp-server"],
|
||||
|
||||
"@types/bun": ["@types/bun@1.3.4", "", { "dependencies": { "bun-types": "1.3.4" } }, "sha512-EEPTKXHP+zKGPkhRLv+HI0UEX8/o+65hqARxLy8Ov5rIxMBPNTjeZww00CIihrIQGEQBYg+0roO5qOnS/7boGA=="],
|
||||
"@tsconfig/bun": ["@tsconfig/bun@1.0.9", "", {}, "sha512-4M0/Ivfwcpz325z6CwSifOBZYji3DFOEpY6zEUt0+Xi2qRhzwvmqQN9XAHJh3OVvRJuAqVTLU2abdCplvp6mwQ=="],
|
||||
|
||||
"@types/bun": ["@types/bun@1.3.13", "", { "dependencies": { "bun-types": "1.3.13" } }, "sha512-9fqXWk5YIHGGnUau9TEi+qdlTYDAnOj+xLCmSTwXfAIqXr2x4tytJb43E9uCvt09zJURKXwAtkoH4nLQfzeTXw=="],
|
||||
|
||||
"@types/cli-progress": ["@types/cli-progress@3.11.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-cE3+jb9WRlu+uOSAugewNpITJDt1VF8dHOopPO4IABFc3SXYL5WE/+PTz/FCdZRRfIujiWW3n3aMbv1eIGVRWA=="],
|
||||
|
||||
@@ -85,11 +103,29 @@
|
||||
|
||||
"@types/unidecode": ["@types/unidecode@1.1.0", "", {}, "sha512-NTIsFsTe9WRek39/8DDj7KiQ0nU33DHMrKwNHcD1rKlUvn4N0Rc4Di8q/Xavs8bsDZmBa4MMtQA8+HNgwfxC/A=="],
|
||||
|
||||
"@typescript/native-preview": ["@typescript/native-preview@7.0.0-dev.20260428.1", "", { "optionalDependencies": { "@typescript/native-preview-darwin-arm64": "7.0.0-dev.20260428.1", "@typescript/native-preview-darwin-x64": "7.0.0-dev.20260428.1", "@typescript/native-preview-linux-arm": "7.0.0-dev.20260428.1", "@typescript/native-preview-linux-arm64": "7.0.0-dev.20260428.1", "@typescript/native-preview-linux-x64": "7.0.0-dev.20260428.1", "@typescript/native-preview-win32-arm64": "7.0.0-dev.20260428.1", "@typescript/native-preview-win32-x64": "7.0.0-dev.20260428.1" }, "bin": { "tsgo": "bin/tsgo.js" } }, "sha512-JiM4PYWDGs57TT0mV2KArmaW7BnTkk3XRid79NdG17tfvDbRyg4hBCpKI7vARiQPtxjKrHlxyzxOGDpv5W5T7Q=="],
|
||||
|
||||
"@typescript/native-preview-darwin-arm64": ["@typescript/native-preview-darwin-arm64@7.0.0-dev.20260428.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Lll6WmXfgTEj1G3QBIoHlabQwUtJiyhlRgSLksa06QFL5BoA7V+Lu1waa9PtPNZbGsXLDMHodtk/bRQABKuPiw=="],
|
||||
|
||||
"@typescript/native-preview-darwin-x64": ["@typescript/native-preview-darwin-x64@7.0.0-dev.20260428.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-WbsBNSHlo+4sGrTxDWdmI7r8x48tCtSCuKdmK62FvVOq58UWAs6sL13Z4Rev4ohLcGHdXC5E/8AIdpLPqDYQpw=="],
|
||||
|
||||
"@typescript/native-preview-linux-arm": ["@typescript/native-preview-linux-arm@7.0.0-dev.20260428.1", "", { "os": "linux", "cpu": "arm" }, "sha512-/d/NnZFvEJU67L5mHh+cO3gsfwNCvJ9HGtxGq1KGz1VwTabOIcwLdpTpfsAR39WXzzfh9GJHL28n6GSGZInPow=="],
|
||||
|
||||
"@typescript/native-preview-linux-arm64": ["@typescript/native-preview-linux-arm64@7.0.0-dev.20260428.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-cgcBX/ZBMdepkamLT8g8jQdHe7DZS/s6zTZRof6mvcrnJHlMeUnKoC9UO8/c22IrUMV3n0XPh7R8FYjUP0ll+Q=="],
|
||||
|
||||
"@typescript/native-preview-linux-x64": ["@typescript/native-preview-linux-x64@7.0.0-dev.20260428.1", "", { "os": "linux", "cpu": "x64" }, "sha512-4gJCE7wzenx1BH2Vtx2uKWUo8rFxnhGkxNEH1zxbYy/6ASwo+PnOPYmKHAzNE1C3yB5lzw71/vR5p5zyO57Y4A=="],
|
||||
|
||||
"@typescript/native-preview-win32-arm64": ["@typescript/native-preview-win32-arm64@7.0.0-dev.20260428.1", "", { "os": "win32", "cpu": "arm64" }, "sha512-yn6Rzbn62L4QTWrp0QgG8al6l/VG7PCPRdbE0vuGDSlKhInlC+Flo4QSc1qA8KHTbpHgl+nEsq9DymiitI4G4g=="],
|
||||
|
||||
"@typescript/native-preview-win32-x64": ["@typescript/native-preview-win32-x64@7.0.0-dev.20260428.1", "", { "os": "win32", "cpu": "x64" }, "sha512-T9z13mcMowXmwGjprA2FIR2EEdYZxgqH8+qk7dFZVBlo5vfk41AN/qJfAdN7IsAhEb640MJ8cMN/aiczweZKmA=="],
|
||||
|
||||
"ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="],
|
||||
|
||||
"argon2-wasm-pro": ["argon2-wasm-pro@1.1.0", "", {}, "sha512-ApZAKEgbWQILckY+IdjrETB0oTC8L9YHT3JVQhdun77tilExkXNyM/T/qbkvX+Uv68+IQmVwewQwg6yJnSwVxQ=="],
|
||||
|
||||
"boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="],
|
||||
|
||||
"bun-types": ["bun-types@1.3.4", "", { "dependencies": { "@types/node": "*" } }, "sha512-5ua817+BZPZOlNaRgGBpZJOSAQ9RQ17pkwPD0yR7CfJg+r8DgIILByFifDTa+IPDDxzf5VNhtNlcKqFzDgJvlQ=="],
|
||||
"bun-types": ["bun-types@1.3.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-QXKeHLlOLqQX9LgYaHJfzdBaV21T63HhFJnvuRCcjZiaUDpbs5ED1MgxbMra71CsryN/1dAoXuJJJwIv/2drVA=="],
|
||||
|
||||
"cli-progress": ["cli-progress@3.12.0", "", { "dependencies": { "string-width": "^4.2.3" } }, "sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A=="],
|
||||
|
||||
@@ -125,6 +161,20 @@
|
||||
|
||||
"strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="],
|
||||
|
||||
"turbo": ["turbo@2.5.4", "", { "optionalDependencies": { "turbo-darwin-64": "2.5.4", "turbo-darwin-arm64": "2.5.4", "turbo-linux-64": "2.5.4", "turbo-linux-arm64": "2.5.4", "turbo-windows-64": "2.5.4", "turbo-windows-arm64": "2.5.4" }, "bin": { "turbo": "bin/turbo" } }, "sha512-kc8ZibdRcuWUG1pbYSBFWqmIjynlD8Lp7IB6U3vIzvOv9VG+6Sp8bzyeBWE3Oi8XV5KsQrznyRTBPvrf99E4mA=="],
|
||||
|
||||
"turbo-darwin-64": ["turbo-darwin-64@2.5.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-ah6YnH2dErojhFooxEzmvsoZQTMImaruZhFPfMKPBq8sb+hALRdvBNLqfc8NWlZq576FkfRZ/MSi4SHvVFT9PQ=="],
|
||||
|
||||
"turbo-darwin-arm64": ["turbo-darwin-arm64@2.5.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-2+Nx6LAyuXw2MdXb7pxqle3MYignLvS7OwtsP9SgtSBaMlnNlxl9BovzqdYAgkUW3AsYiQMJ/wBRb7d+xemM5A=="],
|
||||
|
||||
"turbo-linux-64": ["turbo-linux-64@2.5.4", "", { "os": "linux", "cpu": "x64" }, "sha512-5May2kjWbc8w4XxswGAl74GZ5eM4Gr6IiroqdLhXeXyfvWEdm2mFYCSWOzz0/z5cAgqyGidF1jt1qzUR8hTmOA=="],
|
||||
|
||||
"turbo-linux-arm64": ["turbo-linux-arm64@2.5.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-/2yqFaS3TbfxV3P5yG2JUI79P7OUQKOUvAnx4MV9Bdz6jqHsHwc9WZPpO4QseQm+NvmgY6ICORnoVPODxGUiJg=="],
|
||||
|
||||
"turbo-windows-64": ["turbo-windows-64@2.5.4", "", { "os": "win32", "cpu": "x64" }, "sha512-EQUO4SmaCDhO6zYohxIjJpOKRN3wlfU7jMAj3CgcyTPvQR/UFLEKAYHqJOnJtymbQmiiM/ihX6c6W6Uq0yC7mA=="],
|
||||
|
||||
"turbo-windows-arm64": ["turbo-windows-arm64@2.5.4", "", { "os": "win32", "cpu": "arm64" }, "sha512-oQ8RrK1VS8lrxkLriotFq+PiF7iiGgkZtfLKF4DDKsmdbPo0O9R2mQxm7jHLuXraRCuIQDWMIw6dpcr7Iykf4A=="],
|
||||
|
||||
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
|
||||
|
||||
"uhyphen": ["uhyphen@0.2.0", "", {}, "sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA=="],
|
||||
|
||||
5
bunfig.toml
Normal file
5
bunfig.toml
Normal file
@@ -0,0 +1,5 @@
|
||||
[install]
|
||||
exact = true
|
||||
|
||||
[test]
|
||||
root = "./do-not-run-tests-from-root"
|
||||
@@ -1,55 +1,18 @@
|
||||
# Marketplace Cookies Setup
|
||||
# cookies
|
||||
|
||||
Both Facebook Marketplace and eBay require valid session cookies to bypass bot detection and access listings.
|
||||
## Scope
|
||||
|
||||
## Cookie Configuration
|
||||
- This directory is for cookie setup docs and local examples only.
|
||||
- Treat any real browser cookie export as a secret, even if already present locally.
|
||||
|
||||
Authenticated scrapers now read cookies only from environment variables:
|
||||
1. `FACEBOOK_COOKIE`
|
||||
2. `EBAY_COOKIE`
|
||||
## Runtime Sources
|
||||
|
||||
---
|
||||
- Authenticated scrapers read raw `Cookie` header strings from environment variables such as `FACEBOOK_COOKIE` and `EBAY_COOKIE`.
|
||||
- Some core entrypoints also accept explicit cookie strings from request/options; explicit input takes precedence over environment values.
|
||||
|
||||
## Facebook Marketplace
|
||||
## Safety Rules
|
||||
|
||||
### Required Cookies
|
||||
- `c_user`: Your Facebook user ID
|
||||
- `xs`: Facebook session token
|
||||
- `fr`: Facebook request token
|
||||
- `datr`: Data attribution token
|
||||
- `sb`: Session browser token
|
||||
|
||||
### Setup
|
||||
|
||||
```bash
|
||||
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||
```
|
||||
|
||||
Use the raw `Cookie` header string copied from an authenticated browser session.
|
||||
|
||||
---
|
||||
|
||||
## eBay
|
||||
|
||||
eBay has aggressive bot detection that blocks requests without valid session cookies.
|
||||
|
||||
### Setup
|
||||
|
||||
```bash
|
||||
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||
```
|
||||
|
||||
Use the raw `Cookie` header string copied from an authenticated browser session.
|
||||
|
||||
---
|
||||
|
||||
## Important Notes
|
||||
|
||||
- Cookies must be from active browser sessions
|
||||
- Cookies expire and need periodic refresh
|
||||
- **NEVER** commit real cookies to version control
|
||||
- Platforms may still block automated scraping despite valid cookies
|
||||
|
||||
## Security
|
||||
|
||||
Do not commit real cookie values or store them in tracked files.
|
||||
- Never commit real cookie values, browser exports, or session files.
|
||||
- Use placeholder values in docs: `c_user=123; xs=token; fr=request`.
|
||||
- Do not paste cookie values into logs, tests, fixtures, or generated agent docs.
|
||||
- If editing this directory, verify diffs do not contain real `c_user`, `xs`, `fr`, `datr`, `sb`, `s`, `ds2`, or `ebay` values.
|
||||
|
||||
511
docs/superpowers/plans/2025-07-14-opencode-monorepo-config.md
Normal file
511
docs/superpowers/plans/2025-07-14-opencode-monorepo-config.md
Normal file
@@ -0,0 +1,511 @@
|
||||
# opencode Monorepo Config Adoption Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use
|
||||
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
|
||||
> to implement this plan task-by-task.
|
||||
> Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Adopt opencode-style monorepo config: Turbo task orchestration, workspace dep
|
||||
catalog, shared root tsconfig, bunfig.toml, and `exports` field in all packages.
|
||||
|
||||
**Architecture:** Pure config changes across 10 files — no source code touched.
|
||||
Root config files are added/updated first, then per-package files updated to reference
|
||||
them. Changes are independent within each task and safe to commit atomically.
|
||||
|
||||
**Tech Stack:** Bun workspaces, Turbo 2.x, @tsconfig/bun, TypeScript (tsgo /
|
||||
@typescript/native-preview)
|
||||
|
||||
* * *
|
||||
|
||||
## File Map
|
||||
|
||||
| File | Action | Responsible for |
|
||||
| --- | --- | --- |
|
||||
| `package.json` | Modify | Workspace catalog, turbo devDep, @tsconfig/bun devDep, updated scripts |
|
||||
| `turbo.json` | Create | Task graph: typecheck, build, test |
|
||||
| `tsconfig.json` | Create | Shared TS compiler options for all packages |
|
||||
| `bunfig.toml` | Create | Exact installs, root test guard |
|
||||
| `packages/core/package.json` | Modify | exports field, catalog refs, script rename |
|
||||
| `packages/api-server/package.json` | Modify | exports field, catalog refs, script rename |
|
||||
| `packages/mcp-server/package.json` | Modify | exports field, catalog refs, script rename |
|
||||
| `packages/core/tsconfig.json` | Modify | Slim — extends root, paths only |
|
||||
| `packages/api-server/tsconfig.json` | Modify | Slim — extends root, paths only |
|
||||
| `packages/mcp-server/tsconfig.json` | Modify | Slim — extends root, paths only |
|
||||
|
||||
* * *
|
||||
|
||||
### Task 1: Add `bunfig.toml` and `turbo.json`
|
||||
|
||||
Two new root config files with no dependencies on other tasks.
|
||||
|
||||
**Files:**
|
||||
|
||||
- Create: `bunfig.toml`
|
||||
|
||||
- Create: `turbo.json`
|
||||
|
||||
- [ ] **Step 1: Create `bunfig.toml`**
|
||||
|
||||
Write this file at repo root (`/path/to/ca-marketplace-scraper/bunfig.toml`):
|
||||
|
||||
```toml
|
||||
[install]
|
||||
exact = true
|
||||
|
||||
[test]
|
||||
root = "./do-not-run-tests-from-root"
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Create `turbo.json`**
|
||||
|
||||
Write this file at repo root:
|
||||
|
||||
```json
|
||||
{
|
||||
"$schema": "https://turbo.build/schema.json",
|
||||
"tasks": {
|
||||
"typecheck": {},
|
||||
"build": {
|
||||
"dependsOn": ["^build"],
|
||||
"outputs": ["dist/**"]
|
||||
},
|
||||
"test": {
|
||||
"dependsOn": ["^build"],
|
||||
"outputs": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Verify files exist**
|
||||
|
||||
Run:
|
||||
```bash
|
||||
ls bunfig.toml turbo.json
|
||||
```
|
||||
Expected: both files listed, no errors.
|
||||
|
||||
- [ ] **Step 4: Commit**
|
||||
|
||||
```bash
|
||||
git add bunfig.toml turbo.json
|
||||
git commit -m "chore: add bunfig.toml and turbo.json"
|
||||
```
|
||||
|
||||
* * *
|
||||
|
||||
### Task 2: Create root `tsconfig.json`
|
||||
|
||||
Shared base tsconfig all packages will extend.
|
||||
Extracts the common options currently duplicated in all 3 per-package tsconfigs.
|
||||
|
||||
**Files:**
|
||||
|
||||
- Create: `tsconfig.json`
|
||||
|
||||
- [ ] **Step 1: Create root `tsconfig.json`**
|
||||
|
||||
Write this file at repo root:
|
||||
|
||||
```json
|
||||
{
|
||||
"$schema": "https://json.schemastore.org/tsconfig",
|
||||
"extends": "@tsconfig/bun/tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"lib": ["dom", "ESNext"],
|
||||
"target": "ESNext",
|
||||
"module": "preserve",
|
||||
"moduleResolution": "bundler",
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"moduleDetection": "force",
|
||||
"jsx": "react-jsx",
|
||||
"allowJs": true,
|
||||
"allowImportingTsExtensions": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"skipLibCheck": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"noUncheckedIndexedAccess": true,
|
||||
"noImplicitOverride": true,
|
||||
"noUnusedLocals": false,
|
||||
"noUnusedParameters": false,
|
||||
"noPropertyAccessFromIndexSignature": false
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Commit**
|
||||
|
||||
```bash
|
||||
git add tsconfig.json
|
||||
git commit -m "chore: add shared root tsconfig.json"
|
||||
```
|
||||
|
||||
* * *
|
||||
|
||||
### Task 3: Update root `package.json`
|
||||
|
||||
Add workspace catalog, `turbo` + `@tsconfig/bun` devDependencies, and update scripts to
|
||||
use `turbo run`.
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `package.json`
|
||||
|
||||
- [ ] **Step 1: Replace root `package.json`**
|
||||
|
||||
Write this complete file:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "marketplace-scrapers-monorepo",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"packageManager": "bun@1.3.13",
|
||||
"scripts": {
|
||||
"typecheck": "turbo run typecheck",
|
||||
"build": "bun run clean && turbo run build",
|
||||
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
|
||||
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
|
||||
"build:all": "bun run build:api && bun run build:mcp",
|
||||
"ci": "biome ci",
|
||||
"clean": "rm -rf dist",
|
||||
"start": "./scripts/start.sh"
|
||||
},
|
||||
"workspaces": {
|
||||
"packages": [
|
||||
"packages/*"
|
||||
],
|
||||
"catalog": {
|
||||
"@tsconfig/bun": "1.0.9",
|
||||
"@typescript/native-preview": "7.0.0-dev.20260428.1",
|
||||
"@types/bun": "1.2.18",
|
||||
"@types/cli-progress": "3.11.6",
|
||||
"@types/unidecode": "1.1.0"
|
||||
}
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "2.3.11",
|
||||
"@tsconfig/bun": "catalog:",
|
||||
"turbo": "2.5.4"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
> **Note on catalog versions:** The catalog pins exact versions.
|
||||
> The values above are taken from the current package installs.
|
||||
> If `@types/bun` was `latest`, check `node_modules/@types/bun/package.json` for the
|
||||
> actual installed version and use that.
|
||||
> Same for `@typescript/native-preview`.
|
||||
|
||||
- [ ] **Step 2: Check actual installed versions**
|
||||
|
||||
Run:
|
||||
```bash
|
||||
cat node_modules/@types/bun/package.json | grep '"version"'
|
||||
cat node_modules/@typescript/native-preview/package.json | grep '"version"'
|
||||
cat node_modules/@types/cli-progress/package.json | grep '"version"'
|
||||
cat node_modules/@types/unidecode/package.json | grep '"version"'
|
||||
```
|
||||
|
||||
Update the catalog values in `package.json` to match the exact installed versions.
|
||||
|
||||
- [ ] **Step 3: Install turbo and @tsconfig/bun**
|
||||
|
||||
```bash
|
||||
bun install
|
||||
```
|
||||
|
||||
Expected: lock file updated, `turbo` and `@tsconfig/bun` appear in `node_modules`.
|
||||
|
||||
- [ ] **Step 4: Verify turbo works**
|
||||
|
||||
```bash
|
||||
bunx turbo run typecheck --dry
|
||||
```
|
||||
|
||||
Expected: output lists the `typecheck` task for each package (even if no `typecheck`
|
||||
script exists yet — turbo will note them as skipped/missing).
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add package.json bun.lock
|
||||
git commit -m "chore: add workspace catalog and turbo to root package.json"
|
||||
```
|
||||
|
||||
* * *
|
||||
|
||||
### Task 4: Update per-package `package.json` files
|
||||
|
||||
Rename `type:check` → `typecheck`, replace `main`/`module` with `exports`, swap pinned
|
||||
dep versions for `catalog:` references.
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/package.json`
|
||||
|
||||
- Modify: `packages/api-server/package.json`
|
||||
|
||||
- Modify: `packages/mcp-server/package.json`
|
||||
|
||||
- [ ] **Step 1: Replace `packages/core/package.json`**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "@marketplace-scrapers/core",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"exports": {
|
||||
".": "./src/index.ts"
|
||||
},
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"typecheck": "bun tsgo"
|
||||
},
|
||||
"dependencies": {
|
||||
"@typescript/native-preview": "catalog:",
|
||||
"cli-progress": "^3.12.0",
|
||||
"linkedom": "^0.18.12",
|
||||
"unidecode": "^1.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "catalog:",
|
||||
"@types/cli-progress": "catalog:",
|
||||
"@types/unidecode": "catalog:"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Replace `packages/api-server/package.json`**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "@marketplace-scrapers/api-server",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"exports": {
|
||||
".": "./src/index.ts"
|
||||
},
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"start": "bun ./src/index.ts",
|
||||
"dev": "bun --watch ./src/index.ts",
|
||||
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/api",
|
||||
"typecheck": "bun tsgo"
|
||||
},
|
||||
"dependencies": {
|
||||
"@marketplace-scrapers/core": "workspace:*",
|
||||
"@typescript/native-preview": "catalog:"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "catalog:"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Replace `packages/mcp-server/package.json`**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "@marketplace-scrapers/mcp-server",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"exports": {
|
||||
".": "./src/index.ts"
|
||||
},
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"start": "bun ./src/index.ts",
|
||||
"dev": "bun --watch ./src/index.ts",
|
||||
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/mcp",
|
||||
"typecheck": "bun tsgo"
|
||||
},
|
||||
"dependencies": {
|
||||
"@marketplace-scrapers/core": "workspace:*",
|
||||
"@typescript/native-preview": "catalog:"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "catalog:"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run `bun install` to sync lockfile**
|
||||
|
||||
```bash
|
||||
bun install
|
||||
```
|
||||
|
||||
Expected: no errors.
|
||||
Catalog refs resolved.
|
||||
`bun.lock` updated.
|
||||
|
||||
- [ ] **Step 5: Verify typecheck still works per-package**
|
||||
|
||||
```bash
|
||||
cd packages/core && bun run typecheck
|
||||
cd ../api-server && bun run typecheck
|
||||
cd ../mcp-server && bun run typecheck
|
||||
cd ../..
|
||||
```
|
||||
|
||||
Expected: each exits 0 (or same errors as before — no new errors introduced).
|
||||
|
||||
- [ ] **Step 6: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/package.json packages/api-server/package.json packages/mcp-server/package.json bun.lock
|
||||
git commit -m "chore: use exports field and catalog refs in all packages"
|
||||
```
|
||||
|
||||
* * *
|
||||
|
||||
### Task 5: Slim per-package `tsconfig.json` files
|
||||
|
||||
Replace the duplicated full tsconfig in each package with a slim `extends`-based one
|
||||
pointing to root.
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/tsconfig.json`
|
||||
|
||||
- Modify: `packages/api-server/tsconfig.json`
|
||||
|
||||
- Modify: `packages/mcp-server/tsconfig.json`
|
||||
|
||||
- [ ] **Step 1: Replace `packages/core/tsconfig.json`**
|
||||
|
||||
```json
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["./src", "./test"]
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Replace `packages/api-server/tsconfig.json`**
|
||||
|
||||
```json
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["./src", "./test"]
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Replace `packages/mcp-server/tsconfig.json`**
|
||||
|
||||
```json
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["./src", "./test"]
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Verify `@tsconfig/bun` is resolvable**
|
||||
|
||||
The root tsconfig extends `@tsconfig/bun/tsconfig.json`. Confirm the package is
|
||||
installed:
|
||||
|
||||
```bash
|
||||
ls node_modules/@tsconfig/bun/tsconfig.json
|
||||
```
|
||||
|
||||
Expected: file exists.
|
||||
|
||||
- [ ] **Step 5: Run typecheck via Turbo**
|
||||
|
||||
```bash
|
||||
bun run typecheck
|
||||
```
|
||||
|
||||
Expected: Turbo runs `typecheck` for all 3 packages in parallel, all pass (or same
|
||||
pre-existing errors — no new ones).
|
||||
|
||||
- [ ] **Step 6: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/tsconfig.json packages/api-server/tsconfig.json packages/mcp-server/tsconfig.json
|
||||
git commit -m "chore: slim per-package tsconfigs to extend root"
|
||||
```
|
||||
|
||||
* * *
|
||||
|
||||
### Task 6: Smoke test full build pipeline
|
||||
|
||||
Verify everything works end-to-end.
|
||||
|
||||
**Files:** none (verification only)
|
||||
|
||||
- [ ] **Step 1: Run turbo typecheck**
|
||||
|
||||
```bash
|
||||
bun run typecheck
|
||||
```
|
||||
|
||||
Expected: Turbo runs `typecheck` across all packages.
|
||||
Exit 0.
|
||||
|
||||
- [ ] **Step 2: Run full build**
|
||||
|
||||
```bash
|
||||
bun run build
|
||||
```
|
||||
|
||||
Expected: `dist/` cleaned, Turbo runs `build` (core first, then api-server and
|
||||
mcp-server in parallel), build artifacts appear in `dist/api/` and `dist/mcp/`.
|
||||
|
||||
- [ ] **Step 3: Verify dist artifacts**
|
||||
|
||||
```bash
|
||||
ls dist/api/ dist/mcp/
|
||||
```
|
||||
|
||||
Expected: compiled output files in both directories.
|
||||
|
||||
- [ ] **Step 4: Verify `bun install` is exact**
|
||||
|
||||
```bash
|
||||
grep -c '\^' bun.lock | head -5
|
||||
```
|
||||
|
||||
With `exact = true` in bunfig.toml, new installs won’t add `^` ranges.
|
||||
Existing `^` ranges in `bun.lock` from before are fine — they’ll be resolved to exact on
|
||||
next fresh install.
|
||||
|
||||
- [ ] **Step 5: Final commit if any loose files**
|
||||
|
||||
```bash
|
||||
git status
|
||||
```
|
||||
|
||||
If clean: done. If any files modified by `bun install` (e.g. `bun.lock`):
|
||||
|
||||
```bash
|
||||
git add bun.lock
|
||||
git commit -m "chore: sync lockfile after monorepo config adoption"
|
||||
```
|
||||
@@ -1,53 +1,64 @@
|
||||
# Cookie Env-Only Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use
|
||||
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
|
||||
> to implement this plan task-by-task.
|
||||
> Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Remove cookie files and request-provided cookie overrides so all authenticated marketplace scraping reads raw `Cookie` header strings only from environment variables.
|
||||
**Goal:** Remove cookie files and request-provided cookie overrides so all authenticated
|
||||
marketplace scraping reads raw `Cookie` header strings only from environment variables.
|
||||
|
||||
**Architecture:** Collapse shared cookie loading to a single env-var reader in `packages/core/src/utils/cookies.ts`, then tighten Facebook and eBay core signatures to stop accepting request/file cookie inputs. Update the API and MCP adapters so they no longer advertise or forward cookie parameters, and rewrite docs/tests to match the env-only contract.
|
||||
**Architecture:** Collapse shared cookie loading to a single env-var reader in
|
||||
`packages/core/src/utils/cookies.ts`, then tighten Facebook and eBay core signatures to
|
||||
stop accepting request/file cookie inputs.
|
||||
Update the API and MCP adapters so they no longer advertise or forward cookie
|
||||
parameters, and rewrite docs/tests to match the env-only contract.
|
||||
|
||||
**Tech Stack:** Bun, TypeScript, Bun test, Biome, workspace package exports
|
||||
|
||||
---
|
||||
* * *
|
||||
|
||||
## File Map
|
||||
|
||||
- Modify: `packages/core/src/utils/cookies.ts`
|
||||
Purpose: remove JSON/file/request-source loading and keep env-only cookie parsing/formatting.
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
Purpose: drop `cookiesSource` / `cookiePath` arguments and env-only error text.
|
||||
- Modify: `packages/core/src/scrapers/ebay.ts`
|
||||
Purpose: remove `opts.cookies` request override and use env-only cookie loading.
|
||||
- Modify: `packages/core/src/index.ts`
|
||||
Purpose: keep exports aligned with tightened core signatures.
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
Purpose: replace missing-file coverage with env-only auth tests.
|
||||
- Create: `packages/core/test/ebay-core.test.ts`
|
||||
Purpose: add dedicated eBay auth regression coverage instead of mixing it into Facebook tests.
|
||||
- Modify: `packages/api-server/src/routes/facebook.ts`
|
||||
Purpose: stop parsing/forwarding `cookies` query params.
|
||||
- Modify: `packages/api-server/src/routes/ebay.ts`
|
||||
Purpose: stop parsing/forwarding `cookies` query params.
|
||||
- Create: `packages/api-server/test/routes.test.ts`
|
||||
Purpose: verify Facebook/eBay routes ignore cookie query params and still call core correctly.
|
||||
- Modify: `packages/mcp-server/src/protocol/tools.ts`
|
||||
Purpose: remove Facebook/eBay cookie tool inputs and descriptions.
|
||||
- Modify: `packages/mcp-server/src/protocol/handler.ts`
|
||||
Purpose: stop mapping removed cookie tool inputs into API URLs.
|
||||
- Create: `packages/mcp-server/test/protocol.test.ts`
|
||||
Purpose: verify tool schemas and handler URL building no longer include Facebook/eBay cookie fields.
|
||||
- Modify: `cookies/AGENTS.md`
|
||||
Purpose: document env vars as the only supported cookie input.
|
||||
- Modify: `packages/core/src/utils/cookies.ts` Purpose: remove JSON/file/request-source
|
||||
loading and keep env-only cookie parsing/formatting.
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts` Purpose: drop `cookiesSource` /
|
||||
`cookiePath` arguments and env-only error text.
|
||||
- Modify: `packages/core/src/scrapers/ebay.ts` Purpose: remove `opts.cookies` request
|
||||
override and use env-only cookie loading.
|
||||
- Modify: `packages/core/src/index.ts` Purpose: keep exports aligned with tightened core
|
||||
signatures.
|
||||
- Modify: `packages/core/test/facebook-core.test.ts` Purpose: replace missing-file
|
||||
coverage with env-only auth tests.
|
||||
- Create: `packages/core/test/ebay-core.test.ts` Purpose: add dedicated eBay auth
|
||||
regression coverage instead of mixing it into Facebook tests.
|
||||
- Modify: `packages/api-server/src/routes/facebook.ts` Purpose: stop parsing/forwarding
|
||||
`cookies` query params.
|
||||
- Modify: `packages/api-server/src/routes/ebay.ts` Purpose: stop parsing/forwarding
|
||||
`cookies` query params.
|
||||
- Create: `packages/api-server/test/routes.test.ts` Purpose: verify Facebook/eBay routes
|
||||
ignore cookie query params and still call core correctly.
|
||||
- Modify: `packages/mcp-server/src/protocol/tools.ts` Purpose: remove Facebook/eBay
|
||||
cookie tool inputs and descriptions.
|
||||
- Modify: `packages/mcp-server/src/protocol/handler.ts` Purpose: stop mapping removed
|
||||
cookie tool inputs into API URLs.
|
||||
- Create: `packages/mcp-server/test/protocol.test.ts` Purpose: verify tool schemas and
|
||||
handler URL building no longer include Facebook/eBay cookie fields.
|
||||
- Modify: `cookies/AGENTS.md` Purpose: document env vars as the only supported cookie
|
||||
input.
|
||||
|
||||
### Task 1: Lock core cookie utilities to env-only loading
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/src/utils/cookies.ts:19-227`
|
||||
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing test**
|
||||
|
||||
Add or replace the auth-source test block in `packages/core/test/facebook-core.test.ts` with env-only expectations:
|
||||
Add or replace the auth-source test block in `packages/core/test/facebook-core.test.ts`
|
||||
with env-only expectations:
|
||||
|
||||
```ts
|
||||
test("should load Facebook cookies from FACEBOOK_COOKIE env var", async () => {
|
||||
@@ -85,12 +96,14 @@ test("should reject missing Facebook auth env var", async () => {
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts`
|
||||
Expected: FAIL because the current implementation still allows missing env values to fall through to file/request-based behavior and does not emit the new env-only error.
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts` Expected: FAIL because the
|
||||
current implementation still allows missing env values to fall through to
|
||||
file/request-based behavior and does not emit the new env-only error.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Replace the multi-source loader in `packages/core/src/utils/cookies.ts` with an env-only loader. The target shape is:
|
||||
Replace the multi-source loader in `packages/core/src/utils/cookies.ts` with an env-only
|
||||
loader. The target shape is:
|
||||
|
||||
```ts
|
||||
export interface CookieConfig {
|
||||
@@ -129,8 +142,8 @@ Delete the now-dead helpers and types that exist only for JSON/file/request load
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts`
|
||||
Expected: PASS for the new env-only tests.
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS for the new
|
||||
env-only tests.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
@@ -142,10 +155,15 @@ git commit -m "refactor: make cookie loading env-only"
|
||||
### Task 2: Tighten Facebook core APIs to the new contract
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts:23-29`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts:214-228`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts:823-929`
|
||||
|
||||
- Modify: `packages/core/src/index.ts:5-15`
|
||||
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing test**
|
||||
@@ -171,8 +189,9 @@ test("should fail Facebook item fetch when FACEBOOK_COOKIE is unset", async () =
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts`
|
||||
Expected: FAIL because the current function signatures and error text still mention parameter/file-based auth paths.
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts` Expected: FAIL because the
|
||||
current function signatures and error text still mention parameter/file-based auth
|
||||
paths.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
@@ -206,12 +225,14 @@ console.warn(
|
||||
);
|
||||
```
|
||||
|
||||
Remove the extra cookie arguments from `fetchFacebookItem(...)` and keep `packages/core/src/index.ts` exporting the tightened functions without the old parameter contract.
|
||||
Remove the extra cookie arguments from `fetchFacebookItem(...)` and keep
|
||||
`packages/core/src/index.ts` exporting the tightened functions without the old parameter
|
||||
contract.
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts`
|
||||
Expected: PASS with the new env-only Facebook API surface.
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS with the new
|
||||
env-only Facebook API surface.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
@@ -223,8 +244,11 @@ git commit -m "refactor: remove facebook cookie overrides"
|
||||
### Task 3: Tighten eBay core APIs to env-only auth
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/src/scrapers/ebay.ts:9-15`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/ebay.ts:337-389`
|
||||
|
||||
- Create: `packages/core/test/ebay-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing test**
|
||||
@@ -249,8 +273,8 @@ test("should warn and continue without eBay cookies when EBAY_COOKIE is unset",
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/ebay-core.test.ts`
|
||||
Expected: FAIL because `loadEbayCookies` still accepts request overrides and mentions file/json sources.
|
||||
Run: `bun test packages/core/test/ebay-core.test.ts` Expected: FAIL because
|
||||
`loadEbayCookies` still accepts request overrides and mentions file/json sources.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
@@ -276,12 +300,13 @@ async function loadEbayCookies(): Promise<string | undefined> {
|
||||
}
|
||||
```
|
||||
|
||||
Then remove `cookies` from `fetchEbayItems(..., opts)` and the destructuring that feeds it into `loadEbayCookies()`.
|
||||
Then remove `cookies` from `fetchEbayItems(..., opts)` and the destructuring that feeds
|
||||
it into `loadEbayCookies()`.
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/ebay-core.test.ts`
|
||||
Expected: PASS for the eBay env-only regression coverage.
|
||||
Run: `bun test packages/core/test/ebay-core.test.ts` Expected: PASS for the eBay
|
||||
env-only regression coverage.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
@@ -293,13 +318,17 @@ git commit -m "refactor: make ebay auth env-only"
|
||||
### Task 4: Remove cookie query parameters from the API adapter
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/api-server/src/routes/facebook.ts:3-33`
|
||||
|
||||
- Modify: `packages/api-server/src/routes/ebay.ts:3-52`
|
||||
|
||||
- Create: `packages/api-server/test/routes.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing test**
|
||||
|
||||
Create `packages/api-server/test/routes.test.ts` and mock `@marketplace-scrapers/core` so the route contract is explicit:
|
||||
Create `packages/api-server/test/routes.test.ts` and mock `@marketplace-scrapers/core`
|
||||
so the route contract is explicit:
|
||||
|
||||
```ts
|
||||
import { afterEach, describe, expect, mock, test } from "bun:test";
|
||||
@@ -347,8 +376,9 @@ test("ebayRoute ignores cookies query parameter", async () => {
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/api-server/test/routes.test.ts`
|
||||
Expected: FAIL because the current routes still parse `reqUrl.searchParams.get("cookies")` and forward it downstream.
|
||||
Run: `bun test packages/api-server/test/routes.test.ts` Expected: FAIL because the
|
||||
current routes still parse `reqUrl.searchParams.get("cookies")` and forward it
|
||||
downstream.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
@@ -383,8 +413,8 @@ const items = await fetchEbayItems(SEARCH_QUERY, 1, {
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/api-server/test/routes.test.ts`
|
||||
Expected: PASS for route coverage and no remaining adapter references to `cookies` for Facebook/eBay.
|
||||
Run: `bun test packages/api-server/test/routes.test.ts` Expected: PASS for route
|
||||
coverage and no remaining adapter references to `cookies` for Facebook/eBay.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
@@ -396,13 +426,17 @@ git commit -m "refactor: remove api cookie query overrides"
|
||||
### Task 5: Remove cookie inputs from MCP tool schemas and request mapping
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/mcp-server/src/protocol/tools.ts:65-148`
|
||||
|
||||
- Modify: `packages/mcp-server/src/protocol/handler.ts:154-211`
|
||||
|
||||
- Create: `packages/mcp-server/test/protocol.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing test**
|
||||
|
||||
Create `packages/mcp-server/test/protocol.test.ts` with schema and URL-building assertions:
|
||||
Create `packages/mcp-server/test/protocol.test.ts` with schema and URL-building
|
||||
assertions:
|
||||
|
||||
```ts
|
||||
import { expect, mock, test } from "bun:test";
|
||||
@@ -445,8 +479,8 @@ expect(calledUrl).not.toContain("cookies=");
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/mcp-server/test/protocol.test.ts`
|
||||
Expected: FAIL because the current MCP schema and handler still expose and forward those inputs.
|
||||
Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: FAIL because the
|
||||
current MCP schema and handler still expose and forward those inputs.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
@@ -465,12 +499,13 @@ Delete the Facebook/eBay cookie tool properties and handler mapping:
|
||||
// if (args.cookies) params.append("cookies", args.cookies);
|
||||
```
|
||||
|
||||
Leave Kijiji alone; this plan only changes Facebook/eBay env-only auth paths defined by the approved spec.
|
||||
Leave Kijiji alone; this plan only changes Facebook/eBay env-only auth paths defined by
|
||||
the approved spec.
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/mcp-server/test/protocol.test.ts`
|
||||
Expected: PASS with MCP definitions and handler mapping in sync.
|
||||
Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: PASS with MCP
|
||||
definitions and handler mapping in sync.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
@@ -482,12 +517,16 @@ git commit -m "refactor: remove mcp cookie parameters"
|
||||
### Task 6: Rewrite cookie documentation and run full verification
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `cookies/AGENTS.md:9-85`
|
||||
- Modify: `docs/superpowers/specs/2026-04-21-cookie-env-only-design.md` only if implementation reveals a spec mismatch
|
||||
|
||||
- Modify: `docs/superpowers/specs/2026-04-21-cookie-env-only-design.md` only if
|
||||
implementation reveals a spec mismatch
|
||||
|
||||
- [ ] **Step 1: Write the failing test**
|
||||
|
||||
Treat docs drift as a contract failure. Capture the required state before editing:
|
||||
Treat docs drift as a contract failure.
|
||||
Capture the required state before editing:
|
||||
|
||||
```md
|
||||
- Cookie setup docs mention env vars only for Facebook and eBay
|
||||
@@ -497,14 +536,14 @@ Treat docs drift as a contract failure. Capture the required state before editin
|
||||
|
||||
- [ ] **Step 2: Run verification to prove current docs are stale**
|
||||
|
||||
Run: `rg -n "facebook\.json|ebay\.json|cookies=" cookies/AGENTS.md`
|
||||
Expected: matches found
|
||||
Run: `rg -n "facebook\.json|ebay\.json|cookies=" cookies/AGENTS.md` Expected: matches
|
||||
found
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Rewrite the cookie setup doc so Facebook and eBay each show only env-var setup:
|
||||
|
||||
```md
|
||||
````md
|
||||
## Cookie Configuration
|
||||
|
||||
All supported authenticated scrapers read cookies only from environment variables.
|
||||
@@ -513,14 +552,14 @@ All supported authenticated scrapers read cookies only from environment variable
|
||||
|
||||
```bash
|
||||
export FACEBOOK_COOKIE='c_user=123; xs=token; fr=request'
|
||||
```
|
||||
````
|
||||
|
||||
### eBay
|
||||
|
||||
```bash
|
||||
export EBAY_COOKIE='s=VALUE; ds2=VALUE; ebay=VALUE'
|
||||
```
|
||||
```
|
||||
````
|
||||
|
||||
Remove the file-based and request-parameter sections entirely.
|
||||
|
||||
@@ -534,10 +573,14 @@ Expected: all commands pass
|
||||
```bash
|
||||
git add cookies/AGENTS.md docs/superpowers/specs/2026-04-21-cookie-env-only-design.md
|
||||
git commit -m "docs: align cookie setup with env-only auth"
|
||||
```
|
||||
````
|
||||
|
||||
## Self-Review
|
||||
|
||||
- Spec coverage check: shared cookie utils, Facebook, eBay, API adapter, MCP adapter, tests, and docs each have explicit tasks.
|
||||
- Placeholder scan: concrete test files are now named for eBay core, API routes, and MCP protocol coverage.
|
||||
- Type consistency check: `ensureCookies(config)` is the single shared loader name used across Tasks 1-3, and Facebook/eBay route signatures stay aligned with the core changes.
|
||||
- Spec coverage check: shared cookie utils, Facebook, eBay, API adapter, MCP adapter,
|
||||
tests, and docs each have explicit tasks.
|
||||
- Placeholder scan: concrete test files are now named for eBay core, API routes, and MCP
|
||||
protocol coverage.
|
||||
- Type consistency check: `ensureCookies(config)` is the single shared loader name used
|
||||
across Tasks 1-3, and Facebook/eBay route signatures stay aligned with the core
|
||||
changes.
|
||||
|
||||
@@ -1,34 +1,49 @@
|
||||
# Facebook Comet Rewrite Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use
|
||||
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
|
||||
> to implement this plan task-by-task.
|
||||
> Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Replace the legacy Facebook Marketplace scraper with a route-aware hybrid Comet-bootstrap parser for both search and item routes.
|
||||
**Goal:** Replace the legacy Facebook Marketplace scraper with a route-aware hybrid
|
||||
Comet-bootstrap parser for both search and item routes.
|
||||
|
||||
**Architecture:** Keep authenticated direct HTTP fetches as the transport. Classify each Facebook response first, then parse route-specific Comet bootstrap/state candidates, and fall back to rendered-HTML extraction only when bootstrap decoding cannot produce the expected search or item shape.
|
||||
**Architecture:** Keep authenticated direct HTTP fetches as the transport.
|
||||
Classify each Facebook response first, then parse route-specific Comet bootstrap/state
|
||||
candidates, and fall back to rendered-HTML extraction only when bootstrap decoding
|
||||
cannot produce the expected search or item shape.
|
||||
|
||||
**Tech Stack:** Bun, TypeScript, `bun:test`, `linkedom`, existing shared cookie/http helpers
|
||||
**Tech Stack:** Bun, TypeScript, `bun:test`, `linkedom`, existing shared cookie/http
|
||||
helpers
|
||||
|
||||
---
|
||||
* * *
|
||||
|
||||
## File Structure
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
- Owns Facebook fetch flow, response classification, bootstrap candidate extraction, search parsing, item parsing, and HTML fallbacks.
|
||||
- Owns Facebook fetch flow, response classification, bootstrap candidate extraction,
|
||||
search parsing, item parsing, and HTML fallbacks.
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
- Owns unit coverage for response classification, bootstrap parsing, fallback parsing, and route-aware item/search extraction behavior.
|
||||
- Owns unit coverage for response classification, bootstrap parsing, fallback parsing,
|
||||
and route-aware item/search extraction behavior.
|
||||
- Modify: `packages/core/test/facebook-integration.test.ts`
|
||||
- Owns higher-level fetch flow tests, auth/degradation behavior, and result shaping for search/item entrypoints.
|
||||
- Owns higher-level fetch flow tests, auth/degradation behavior, and result shaping
|
||||
for search/item entrypoints.
|
||||
|
||||
### Task 1: Add Route Classification Coverage
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
Add these tests near the Facebook parser tests in `packages/core/test/facebook-core.test.ts`:
|
||||
Add these tests near the Facebook parser tests in
|
||||
`packages/core/test/facebook-core.test.ts`:
|
||||
|
||||
```ts
|
||||
test("classifies Comet search responses", () => {
|
||||
@@ -89,12 +104,14 @@ test("classifies unavailable item responses", () => {
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
|
||||
Expected: FAIL because `classifyFacebookResponse` does not exist yet.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Add this type and function near the parsing section in `packages/core/src/scrapers/facebook.ts`:
|
||||
Add this type and function near the parsing section in
|
||||
`packages/core/src/scrapers/facebook.ts`:
|
||||
|
||||
```ts
|
||||
type FacebookResponseKind = "search" | "item" | "auth_gated" | "unavailable" | "unknown";
|
||||
@@ -128,7 +145,8 @@ export function classifyFacebookResponse(htmlString: HTMLString, responseUrl: st
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
@@ -141,8 +159,11 @@ git commit -m "refactor: add facebook response classification"
|
||||
### Task 2: Add Bootstrap Candidate Extraction
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
@@ -185,7 +206,8 @@ test("keeps candidate order stable for later scoring", () => {
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
|
||||
Expected: FAIL because `extractFacebookBootstrapCandidates` does not exist.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
@@ -218,7 +240,8 @@ export function extractFacebookBootstrapCandidates(htmlString: HTMLString): Reco
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
@@ -231,10 +254,15 @@ git commit -m "refactor: add facebook bootstrap candidate extraction"
|
||||
### Task 3: Replace Search Parsing With Candidate Scoring
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- Modify: `packages/core/test/facebook-integration.test.ts`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- Test: `packages/core/test/facebook-integration.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
@@ -323,12 +351,15 @@ const mockSearchHtml = `
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet bootstrap candidates"`
|
||||
Expected: FAIL because the current search extractor only understands legacy `marketplace_search` shapes.
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet bootstrap candidates"`
|
||||
Expected: FAIL because the current search extractor only understands legacy
|
||||
`marketplace_search` shapes.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Replace the search extraction internals in `extractFacebookMarketplaceData()` with candidate scoring like this:
|
||||
Replace the search extraction internals in `extractFacebookMarketplaceData()` with
|
||||
candidate scoring like this:
|
||||
|
||||
```ts
|
||||
function findSearchEdges(candidate: unknown): FacebookEdge[] | null {
|
||||
@@ -383,7 +414,8 @@ export function extractFacebookMarketplaceData(htmlString: HTMLString): Facebook
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
|
||||
Expected: PASS for the rewritten search fixtures and existing unaffected tests.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
@@ -396,8 +428,11 @@ git commit -m "refactor: rewrite facebook search parser for comet bootstrap"
|
||||
### Task 4: Replace Item Parsing With Candidate Scoring
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
@@ -438,7 +473,8 @@ test("extracts item details from Comet permalink bootstrap candidates", () => {
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet permalink bootstrap"`
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet permalink bootstrap"`
|
||||
Expected: FAIL because the current item extractor depends on legacy permalink markers.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
@@ -491,8 +527,8 @@ export function extractFacebookItemData(htmlString: HTMLString): FacebookMarketp
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts`
|
||||
Expected: PASS for current-shape item tests and remaining parser tests.
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts` Expected: PASS for
|
||||
current-shape item tests and remaining parser tests.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
@@ -504,8 +540,11 @@ git commit -m "refactor: rewrite facebook item parser for comet bootstrap"
|
||||
### Task 5: Add HTML Fallback Extraction
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
@@ -549,8 +588,10 @@ test("falls back to rendered item HTML when bootstrap payloads are undecodable",
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
|
||||
Expected: FAIL because the extractor currently returns `null` without a structured candidate.
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
|
||||
Expected: FAIL because the extractor currently returns `null` without a structured
|
||||
candidate.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
@@ -607,11 +648,13 @@ function extractItemFallback(htmlString: HTMLString): FacebookMarketplaceItem |
|
||||
}
|
||||
```
|
||||
|
||||
Then call these helpers as the last fallback inside `extractFacebookMarketplaceData()` and `extractFacebookItemData()`.
|
||||
Then call these helpers as the last fallback inside `extractFacebookMarketplaceData()`
|
||||
and `extractFacebookItemData()`.
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
@@ -624,8 +667,11 @@ git commit -m "refactor: add facebook html fallbacks"
|
||||
### Task 6: Wire Route-Aware Failures Into Entry Points
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/test/facebook-integration.test.ts`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
|
||||
- Test: `packages/core/test/facebook-integration.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
@@ -664,8 +710,10 @@ test("returns null for unavailable item responses", async () => {
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-integration.test.ts --test-name-pattern "auth-gated|unavailable"`
|
||||
Expected: FAIL because the entrypoints do not yet classify successful HTML responses by route/auth state.
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-integration.test.ts --test-name-pattern "auth-gated|unavailable"`
|
||||
Expected: FAIL because the entrypoints do not yet classify successful HTML responses by
|
||||
route/auth state.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
@@ -690,12 +738,13 @@ if (itemResponseClass.kind === "unavailable") {
|
||||
}
|
||||
```
|
||||
|
||||
Use the actual response URL from `fetchHtml` plumbing if that helper is extended to return both HTML and final URL; otherwise start by threading final URL support through the fetch helper in the same task.
|
||||
Use the actual response URL from `fetchHtml` plumbing if that helper is extended to
|
||||
return both HTML and final URL; otherwise start by threading final URL support through
|
||||
the fetch helper in the same task.
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-integration.test.ts`
|
||||
Expected: PASS
|
||||
Run: `bun test packages/core/test/facebook-integration.test.ts` Expected: PASS
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
@@ -707,19 +756,22 @@ git commit -m "refactor: handle facebook route-aware failure states"
|
||||
### Task 7: Run Full Verification And Live Probe
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts` if small cleanup is required
|
||||
|
||||
- Modify: `packages/core/test/facebook-core.test.ts` if small cleanup is required
|
||||
|
||||
- Modify: `packages/core/test/facebook-integration.test.ts` if small cleanup is required
|
||||
|
||||
- [ ] **Step 1: Run focused Facebook tests**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 2: Run broader core tests**
|
||||
|
||||
Run: `bun test packages/core/test`
|
||||
Expected: PASS
|
||||
Run: `bun test packages/core/test` Expected: PASS
|
||||
|
||||
- [ ] **Step 3: Run live authenticated Facebook probe**
|
||||
|
||||
@@ -742,11 +794,14 @@ if (results[0]?.url) {
|
||||
Expected:
|
||||
|
||||
- search returns at least one result
|
||||
- item fetch returns non-null for the first live result when the route is not stale/unavailable
|
||||
|
||||
- item fetch returns non-null for the first live result when the route is not
|
||||
stale/unavailable
|
||||
|
||||
- [ ] **Step 4: Make any minimal cleanup needed to keep tests and live probe green**
|
||||
|
||||
If cleanup is needed, keep it limited to naming, dead-code removal caused by the rewrite, or small parser corrections directly exposed by the verification commands.
|
||||
If cleanup is needed, keep it limited to naming, dead-code removal caused by the
|
||||
rewrite, or small parser corrections directly exposed by the verification commands.
|
||||
|
||||
- [ ] **Step 5: Re-run verification**
|
||||
|
||||
@@ -767,6 +822,11 @@ git commit -m "refactor: complete facebook comet scraper rewrite"
|
||||
|
||||
## Self-Review
|
||||
|
||||
- Spec coverage: the plan covers classification, route-aware search parsing, route-aware item parsing, HTML fallbacks, explicit failure-state handling, test replacement, and live verification.
|
||||
- Placeholder scan: no `TODO`, `TBD`, or unspecified “handle appropriately” steps remain.
|
||||
- Type consistency: all planned functions and types use the same names across tasks: `classifyFacebookResponse`, `extractFacebookBootstrapCandidates`, `extractFacebookMarketplaceData`, and `extractFacebookItemData`.
|
||||
- Spec coverage: the plan covers classification, route-aware search parsing, route-aware
|
||||
item parsing, HTML fallbacks, explicit failure-state handling, test replacement, and
|
||||
live verification.
|
||||
- Placeholder scan: no `TODO`, `TBD`, or unspecified “handle appropriately” steps
|
||||
remain.
|
||||
- Type consistency: all planned functions and types use the same names across tasks:
|
||||
`classifyFacebookResponse`, `extractFacebookBootstrapCandidates`,
|
||||
`extractFacebookMarketplaceData`, and `extractFacebookItemData`.
|
||||
|
||||
718
docs/superpowers/plans/2026-04-22-unstable-listing-mode.md
Normal file
718
docs/superpowers/plans/2026-04-22-unstable-listing-mode.md
Normal file
@@ -0,0 +1,718 @@
|
||||
# Unstable Listing Mode Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use
|
||||
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
|
||||
> to implement this plan task-by-task.
|
||||
> Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Add an optional shared mode across Facebook, eBay, and Kijiji that moves
|
||||
listings priced below 80% of the median into `unstableResults`, while preserving current
|
||||
default response shapes.
|
||||
|
||||
**Architecture:** Introduce a shared generic classifier in `packages/core` that splits
|
||||
any listing array into `results` and `unstableResults` using the same median-based rule.
|
||||
Then thread one opt-in flag through the scraper entrypoints, API routes, and MCP tool
|
||||
definitions so all surfaces expose the same behavior without changing existing defaults.
|
||||
|
||||
**Tech Stack:** Bun, TypeScript, Bun test, workspace packages, JSON-RPC MCP server
|
||||
|
||||
* * *
|
||||
|
||||
## File Map
|
||||
|
||||
- Create: `packages/core/src/utils/unstable.ts` Purpose: shared generic median/cutoff
|
||||
classifier for listing arrays.
|
||||
- Modify: `packages/core/src/types/common.ts` Purpose: add shared mode types used by
|
||||
scrapers and adapters.
|
||||
- Modify: `packages/core/src/index.ts` Purpose: export the new shared classifier/types.
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts` Purpose: add the optional mode flag
|
||||
and return bucketed results when enabled.
|
||||
- Modify: `packages/core/src/scrapers/ebay.ts` Purpose: add the optional mode flag and
|
||||
return bucketed results when enabled.
|
||||
- Modify: `packages/core/src/scrapers/kijiji.ts` Purpose: add the optional mode flag and
|
||||
return bucketed results when enabled.
|
||||
- Create: `packages/core/test/unstable-listing-mode.test.ts` Purpose: lock the shared
|
||||
classifier behavior with direct unit tests.
|
||||
- Modify: `packages/core/test/facebook-core.test.ts` Purpose: prove Facebook preserves
|
||||
default arrays and returns buckets when enabled.
|
||||
- Modify: `packages/core/test/ebay-core.test.ts` Purpose: prove eBay preserves default
|
||||
arrays and returns buckets when enabled.
|
||||
- Modify: `packages/core/test/kijiji-core.test.ts` Purpose: prove Kijiji preserves
|
||||
default arrays and returns buckets when enabled.
|
||||
- Modify: `packages/api-server/src/routes/facebook.ts` Purpose: expose a shared opt-in
|
||||
query parameter and preserve default response shape.
|
||||
- Modify: `packages/api-server/src/routes/ebay.ts` Purpose: expose the same query
|
||||
parameter and preserve default response shape.
|
||||
- Modify: `packages/api-server/src/routes/kijiji.ts` Purpose: expose the same query
|
||||
parameter and preserve default response shape.
|
||||
- Modify: `packages/api-server/test/routes.test.ts` Purpose: verify route forwarding and
|
||||
route response-shape switching.
|
||||
- Modify: `packages/mcp-server/src/protocol/tools.ts` Purpose: document the optional
|
||||
unstable mode in all search tools.
|
||||
- Modify: `packages/mcp-server/src/protocol/handler.ts` Purpose: forward the optional
|
||||
mode to API routes for all search tools.
|
||||
- Modify: `packages/mcp-server/test/protocol.test.ts` Purpose: verify MCP tool metadata
|
||||
and forwarded URLs include the new option.
|
||||
|
||||
### Task 1: Add the shared unstable-listing classifier
|
||||
|
||||
**Files:**
|
||||
|
||||
- Create: `packages/core/src/utils/unstable.ts`
|
||||
|
||||
- Modify: `packages/core/src/types/common.ts`
|
||||
|
||||
- Modify: `packages/core/src/index.ts`
|
||||
|
||||
- Test: `packages/core/test/unstable-listing-mode.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing test**
|
||||
|
||||
Create `packages/core/test/unstable-listing-mode.test.ts` with focused shared-behavior
|
||||
coverage:
|
||||
|
||||
```ts
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import {
|
||||
classifyUnstableListings,
|
||||
type ListingDetails,
|
||||
} from "../src/index";
|
||||
|
||||
function makeListing(title: string, cents?: number): ListingDetails {
|
||||
return {
|
||||
url: `https://example.com/${title}`,
|
||||
title,
|
||||
listingPrice: {
|
||||
amountFormatted: cents ? `$${(cents / 100).toFixed(2)}` : "$0.00",
|
||||
cents: cents ?? 0,
|
||||
currency: "CAD",
|
||||
},
|
||||
listingType: "item",
|
||||
listingStatus: "ACTIVE",
|
||||
};
|
||||
}
|
||||
|
||||
describe("classifyUnstableListings", () => {
|
||||
test("moves listings below 80% of the median into unstableResults", () => {
|
||||
const output = classifyUnstableListings([
|
||||
makeListing("cheap", 1000),
|
||||
makeListing("mid", 2000),
|
||||
makeListing("high", 3000),
|
||||
]);
|
||||
|
||||
expect(output.results.map((item) => item.title)).toEqual(["mid", "high"]);
|
||||
expect(output.unstableResults.map((item) => item.title)).toEqual(["cheap"]);
|
||||
});
|
||||
|
||||
test("uses the midpoint median for even-sized priced inputs", () => {
|
||||
const output = classifyUnstableListings([
|
||||
makeListing("a", 1000),
|
||||
makeListing("b", 2000),
|
||||
makeListing("c", 3000),
|
||||
makeListing("d", 4000),
|
||||
]);
|
||||
|
||||
expect(output.results.map((item) => item.title)).toEqual(["b", "c", "d"]);
|
||||
expect(output.unstableResults.map((item) => item.title)).toEqual(["a"]);
|
||||
});
|
||||
|
||||
test("keeps non-positive prices in results while excluding them from median input", () => {
|
||||
const output = classifyUnstableListings([
|
||||
makeListing("free", 0),
|
||||
makeListing("cheap", 1000),
|
||||
makeListing("mid", 2000),
|
||||
makeListing("high", 3000),
|
||||
]);
|
||||
|
||||
expect(output.results.map((item) => item.title)).toEqual(["free", "mid", "high"]);
|
||||
expect(output.unstableResults.map((item) => item.title)).toEqual(["cheap"]);
|
||||
});
|
||||
|
||||
test("returns all listings as results when fewer than two valid prices exist", () => {
|
||||
const output = classifyUnstableListings([makeListing("only", 2500)]);
|
||||
|
||||
expect(output.results.map((item) => item.title)).toEqual(["only"]);
|
||||
expect(output.unstableResults).toEqual([]);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/unstable-listing-mode.test.ts` Expected: FAIL because
|
||||
`classifyUnstableListings` and the shared mode types do not exist yet.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Add shared types in `packages/core/src/types/common.ts`:
|
||||
|
||||
```ts
|
||||
export interface UnstableListingBuckets<T> {
|
||||
results: T[];
|
||||
unstableResults: T[];
|
||||
}
|
||||
|
||||
export interface UnstableListingModeOptions {
|
||||
hideUnstableResults?: boolean;
|
||||
}
|
||||
```
|
||||
|
||||
Create `packages/core/src/utils/unstable.ts` with the shared classifier:
|
||||
|
||||
```ts
|
||||
import type { ListingDetails, UnstableListingBuckets } from "../types/common";
|
||||
|
||||
function getMedian(values: number[]): number | null {
|
||||
if (values.length < 2) return null;
|
||||
|
||||
const sorted = [...values].sort((a, b) => a - b);
|
||||
const middle = Math.floor(sorted.length / 2);
|
||||
|
||||
if (sorted.length % 2 === 0) {
|
||||
return (sorted[middle - 1] + sorted[middle]) / 2;
|
||||
}
|
||||
|
||||
return sorted[middle];
|
||||
}
|
||||
|
||||
export function classifyUnstableListings<T extends ListingDetails>(
|
||||
listings: T[],
|
||||
): UnstableListingBuckets<T> {
|
||||
const pricedValues = listings
|
||||
.map((listing) => listing.listingPrice?.cents)
|
||||
.filter((cents): cents is number => Number.isFinite(cents) && cents > 0);
|
||||
|
||||
const median = getMedian(pricedValues);
|
||||
if (median == null) {
|
||||
return { results: listings, unstableResults: [] };
|
||||
}
|
||||
|
||||
const threshold = median * 0.8;
|
||||
const results: T[] = [];
|
||||
const unstableResults: T[] = [];
|
||||
|
||||
for (const listing of listings) {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
if (Number.isFinite(cents) && cents > 0 && cents < threshold) {
|
||||
unstableResults.push(listing);
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push(listing);
|
||||
}
|
||||
|
||||
return { results, unstableResults };
|
||||
}
|
||||
```
|
||||
|
||||
Export the new symbols from `packages/core/src/index.ts`:
|
||||
|
||||
```ts
|
||||
export * from "./types/common";
|
||||
export { classifyUnstableListings } from "./utils/unstable";
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/unstable-listing-mode.test.ts` Expected: PASS with 4
|
||||
passing tests.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/src/utils/unstable.ts packages/core/src/types/common.ts packages/core/src/index.ts packages/core/test/unstable-listing-mode.test.ts
|
||||
git commit -m "feat: add shared unstable listing classifier"
|
||||
```
|
||||
|
||||
### Task 2: Thread the optional mode through all core scrapers
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/ebay.ts`
|
||||
|
||||
- Modify: `packages/core/src/scrapers/kijiji.ts`
|
||||
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- Modify: `packages/core/test/ebay-core.test.ts`
|
||||
|
||||
- Modify: `packages/core/test/kijiji-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
Add one focused opt-in test per scraper.
|
||||
Use the new shared classifier through the public scraper entrypoints instead of testing
|
||||
internal helpers.
|
||||
|
||||
In `packages/core/test/facebook-core.test.ts`, add:
|
||||
|
||||
```ts
|
||||
test("fetchFacebookItems returns stable and unstable buckets when unstable mode is enabled", async () => {
|
||||
process.env.FACEBOOK_COOKIE = "c_user=123; xs=abc";
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(facebookSearchHtmlFixture),
|
||||
headers: { get: () => null },
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await fetchFacebookItems("bike", 1, "toronto", 25, {
|
||||
hideUnstableResults: true,
|
||||
});
|
||||
|
||||
expect(result).toHaveProperty("results");
|
||||
expect(result).toHaveProperty("unstableResults");
|
||||
});
|
||||
```
|
||||
|
||||
In `packages/core/test/ebay-core.test.ts`, add:
|
||||
|
||||
```ts
|
||||
test("fetchEbayItems returns stable and unstable buckets when unstable mode is enabled", async () => {
|
||||
const result = await fetchEbayItems("bike", 1, {
|
||||
keywords: ["bike"],
|
||||
exclusions: [],
|
||||
strictMode: false,
|
||||
buyItNowOnly: true,
|
||||
canadaOnly: true,
|
||||
}, {
|
||||
hideUnstableResults: true,
|
||||
});
|
||||
|
||||
expect(result).toHaveProperty("results");
|
||||
expect(result).toHaveProperty("unstableResults");
|
||||
});
|
||||
```
|
||||
|
||||
In `packages/core/test/kijiji-core.test.ts`, add:
|
||||
|
||||
```ts
|
||||
test("fetchKijijiItems returns stable and unstable buckets when unstable mode is enabled", async () => {
|
||||
const result = await fetchKijijiItems(
|
||||
"bike",
|
||||
1,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1 },
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
|
||||
expect(result).toHaveProperty("results");
|
||||
expect(result).toHaveProperty("unstableResults");
|
||||
});
|
||||
```
|
||||
|
||||
Also add one default-mode assertion in one existing scraper test file, for example in
|
||||
`packages/core/test/facebook-core.test.ts`:
|
||||
|
||||
```ts
|
||||
test("fetchFacebookItems keeps returning an array by default", async () => {
|
||||
process.env.FACEBOOK_COOKIE = "c_user=123; xs=abc";
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(facebookSearchHtmlFixture),
|
||||
headers: { get: () => null },
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await fetchFacebookItems("bike");
|
||||
expect(Array.isArray(result)).toBe(true);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run tests to verify they fail**
|
||||
|
||||
Run:
|
||||
`bun test packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts`
|
||||
Expected: FAIL because the scraper signatures do not yet accept the new option and still
|
||||
always return arrays.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Add a small shared helper type import to each scraper:
|
||||
|
||||
```ts
|
||||
import {
|
||||
classifyUnstableListings,
|
||||
type UnstableListingBuckets,
|
||||
type UnstableListingModeOptions,
|
||||
} from "../index";
|
||||
```
|
||||
|
||||
In `packages/core/src/scrapers/facebook.ts`, extend the default export signature and
|
||||
branch at the end:
|
||||
|
||||
```ts
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
LOCATION = "toronto",
|
||||
MAX_ITEMS = 25,
|
||||
unstableOptions: UnstableListingModeOptions = {},
|
||||
): Promise<FacebookListingDetails[] | UnstableListingBuckets<FacebookListingDetails>> {
|
||||
// existing fetch/parsing logic
|
||||
|
||||
const limitedItems = pricedItems.slice(0, MAX_ITEMS);
|
||||
if (!unstableOptions.hideUnstableResults) {
|
||||
return limitedItems;
|
||||
}
|
||||
|
||||
const classified = classifyUnstableListings(pricedItems);
|
||||
return {
|
||||
results: classified.results.slice(0, MAX_ITEMS),
|
||||
unstableResults: classified.unstableResults,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
In `packages/core/src/scrapers/ebay.ts`, extend the entrypoint the same way:
|
||||
|
||||
```ts
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
options: EbaySearchOptions = {},
|
||||
unstableOptions: UnstableListingModeOptions = {},
|
||||
): Promise<EbayListingDetails[] | UnstableListingBuckets<EbayListingDetails>> {
|
||||
// existing fetch/parsing logic
|
||||
|
||||
const limitedResults = maxItems ? listings.slice(0, maxItems) : listings;
|
||||
if (!unstableOptions.hideUnstableResults) {
|
||||
return limitedResults;
|
||||
}
|
||||
|
||||
const classified = classifyUnstableListings(listings);
|
||||
return {
|
||||
results: maxItems ? classified.results.slice(0, maxItems) : classified.results,
|
||||
unstableResults: classified.unstableResults,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
In `packages/core/src/scrapers/kijiji.ts`, add the same final argument after
|
||||
`listingOptions`:
|
||||
|
||||
```ts
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
BASE_URL = "https://www.kijiji.ca",
|
||||
searchOptions: SearchOptions = {},
|
||||
listingOptions: ListingFetchOptions = {},
|
||||
unstableOptions: UnstableListingModeOptions = {},
|
||||
): Promise<DetailedListing[] | UnstableListingBuckets<DetailedListing>> {
|
||||
// existing fetch/parsing logic
|
||||
|
||||
if (!unstableOptions.hideUnstableResults) {
|
||||
return allListings;
|
||||
}
|
||||
|
||||
return classifyUnstableListings(allListings);
|
||||
}
|
||||
```
|
||||
|
||||
Keep the default branch untouched in all three files so existing callers still receive
|
||||
arrays.
|
||||
|
||||
- [ ] **Step 4: Run tests to verify they pass**
|
||||
|
||||
Run:
|
||||
`bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts`
|
||||
Expected: PASS, including the new opt-in bucket assertions and the default-array
|
||||
regression assertion.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/src/scrapers/facebook.ts packages/core/src/scrapers/ebay.ts packages/core/src/scrapers/kijiji.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts
|
||||
git commit -m "feat: add unstable mode to scraper results"
|
||||
```
|
||||
|
||||
### Task 3: Expose unstable mode in API routes
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/api-server/src/routes/facebook.ts`
|
||||
|
||||
- Modify: `packages/api-server/src/routes/ebay.ts`
|
||||
|
||||
- Modify: `packages/api-server/src/routes/kijiji.ts`
|
||||
|
||||
- Modify: `packages/api-server/test/routes.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
Extend `packages/api-server/test/routes.test.ts` with route-forwarding coverage for the
|
||||
new query parameter:
|
||||
|
||||
```ts
|
||||
test("facebookRoute forwards unstableFilter=true to core", async () => {
|
||||
const { facebookRoute } = await import("../src/routes/facebook");
|
||||
|
||||
await facebookRoute(
|
||||
new Request(
|
||||
"http://localhost/api/facebook?q=laptop&location=toronto&maxItems=3&unstableFilter=true",
|
||||
),
|
||||
);
|
||||
|
||||
expect(fetchFacebookItems).toHaveBeenCalledWith(
|
||||
"laptop",
|
||||
1,
|
||||
"toronto",
|
||||
3,
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
});
|
||||
|
||||
test("ebayRoute forwards unstableFilter=true to core", async () => {
|
||||
const { ebayRoute } = await import("../src/routes/ebay");
|
||||
|
||||
await ebayRoute(
|
||||
new Request("http://localhost/api/ebay?q=laptop&unstableFilter=true"),
|
||||
);
|
||||
|
||||
expect(fetchEbayItems).toHaveBeenCalledWith(
|
||||
"laptop",
|
||||
1,
|
||||
{
|
||||
minPrice: undefined,
|
||||
maxPrice: undefined,
|
||||
strictMode: false,
|
||||
exclusions: [],
|
||||
keywords: ["laptop"],
|
||||
buyItNowOnly: true,
|
||||
canadaOnly: true,
|
||||
},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
});
|
||||
|
||||
test("kijijiRoute forwards unstableFilter=true to core", async () => {
|
||||
const { kijijiRoute } = await import("../src/routes/kijiji");
|
||||
|
||||
await kijijiRoute(
|
||||
new Request("http://localhost/api/kijiji?q=laptop&unstableFilter=true"),
|
||||
);
|
||||
|
||||
expect(fetchKijijiItems).toHaveBeenCalledWith(
|
||||
"laptop",
|
||||
4,
|
||||
"https://www.kijiji.ca",
|
||||
expect.any(Object),
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run tests to verify they fail**
|
||||
|
||||
Run: `bun test packages/api-server/test/routes.test.ts` Expected: FAIL because the
|
||||
routes do not yet parse or forward `unstableFilter`.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
In each route, parse the shared boolean once:
|
||||
|
||||
```ts
|
||||
const hideUnstableResults = reqUrl.searchParams.get("unstableFilter") === "true";
|
||||
```
|
||||
|
||||
Update the core calls to forward the shared option.
|
||||
|
||||
In `packages/api-server/src/routes/facebook.ts`:
|
||||
|
||||
```ts
|
||||
const items = await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems, {
|
||||
hideUnstableResults,
|
||||
});
|
||||
```
|
||||
|
||||
In `packages/api-server/src/routes/ebay.ts`:
|
||||
|
||||
```ts
|
||||
const items = await fetchEbayItems(
|
||||
SEARCH_QUERY,
|
||||
1,
|
||||
{
|
||||
minPrice,
|
||||
maxPrice,
|
||||
strictMode,
|
||||
exclusions,
|
||||
keywords,
|
||||
buyItNowOnly,
|
||||
canadaOnly,
|
||||
},
|
||||
{ hideUnstableResults },
|
||||
);
|
||||
```
|
||||
|
||||
In `packages/api-server/src/routes/kijiji.ts`:
|
||||
|
||||
```ts
|
||||
const items = await fetchKijijiItems(
|
||||
SEARCH_QUERY,
|
||||
4,
|
||||
"https://www.kijiji.ca",
|
||||
searchOptions,
|
||||
{},
|
||||
{ hideUnstableResults },
|
||||
);
|
||||
```
|
||||
|
||||
Do not add any response wrapper logic in the routes; simply return whatever the core
|
||||
scraper returns so the default array path remains unchanged.
|
||||
|
||||
- [ ] **Step 4: Run tests to verify they pass**
|
||||
|
||||
Run: `bun test packages/api-server/test/routes.test.ts` Expected: PASS, including
|
||||
existing cookie-parameter regression tests and the new unstable-mode forwarding
|
||||
assertions.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/api-server/src/routes/facebook.ts packages/api-server/src/routes/ebay.ts packages/api-server/src/routes/kijiji.ts packages/api-server/test/routes.test.ts
|
||||
git commit -m "feat: expose unstable mode in api routes"
|
||||
```
|
||||
|
||||
### Task 4: Document and forward unstable mode in MCP tools
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/mcp-server/src/protocol/tools.ts`
|
||||
|
||||
- Modify: `packages/mcp-server/src/protocol/handler.ts`
|
||||
|
||||
- Modify: `packages/mcp-server/test/protocol.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
Extend `packages/mcp-server/test/protocol.test.ts` with metadata and forwarding
|
||||
coverage:
|
||||
|
||||
```ts
|
||||
test("search tools document unstable listing mode", () => {
|
||||
for (const toolName of ["search_kijiji", "search_facebook", "search_ebay"]) {
|
||||
const tool = tools.find((entry) => entry.name === toolName);
|
||||
|
||||
expect(tool?.inputSchema.properties).toHaveProperty("unstableFilter");
|
||||
expect(tool?.inputSchema.properties.unstableFilter.description).toContain(
|
||||
"20% below the median",
|
||||
);
|
||||
expect(tool?.inputSchema.properties.unstableFilter.description).toContain(
|
||||
"unstableResults",
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("search_facebook forwards unstableFilter to the API", async () => {
|
||||
await handleMcpRequest(
|
||||
new Request("http://localhost", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: 1,
|
||||
method: "tools/call",
|
||||
params: {
|
||||
name: "search_facebook",
|
||||
arguments: {
|
||||
query: "laptop",
|
||||
unstableFilter: true,
|
||||
},
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const calledUrl = (global.fetch as ReturnType<typeof mock>).mock.calls[0]?.[0];
|
||||
expect(String(calledUrl)).toContain("unstableFilter=true");
|
||||
});
|
||||
```
|
||||
|
||||
Mirror the forwarding assertion for `search_kijiji` and `search_ebay` in the same file.
|
||||
|
||||
- [ ] **Step 2: Run tests to verify they fail**
|
||||
|
||||
Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: FAIL because the
|
||||
tools do not yet describe `unstableFilter` and the handler does not append it to API
|
||||
URLs.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
In `packages/mcp-server/src/protocol/tools.ts`, add the same optional property to all
|
||||
three tools:
|
||||
|
||||
```ts
|
||||
unstableFilter: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"Optional: move listings priced more than 20% below the median into unstableResults instead of the main results. When enabled, the response shape changes from a plain list to an object with results and unstableResults.",
|
||||
default: false,
|
||||
},
|
||||
```
|
||||
|
||||
In `packages/mcp-server/src/protocol/handler.ts`, append the shared flag in each search
|
||||
branch:
|
||||
|
||||
```ts
|
||||
if (args.unstableFilter !== undefined) {
|
||||
params.append("unstableFilter", args.unstableFilter.toString());
|
||||
}
|
||||
```
|
||||
|
||||
Add that snippet to the `search_kijiji`, `search_facebook`, and `search_ebay` branches.
|
||||
|
||||
- [ ] **Step 4: Run tests to verify they pass**
|
||||
|
||||
Run: `bun test packages/mcp-server/test/protocol.test.ts` Expected: PASS, including the
|
||||
new tool-schema assertions and URL-forwarding assertions.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/mcp-server/src/protocol/tools.ts packages/mcp-server/src/protocol/handler.ts packages/mcp-server/test/protocol.test.ts
|
||||
git commit -m "docs: expose unstable mode in mcp tools"
|
||||
```
|
||||
|
||||
### Task 5: Verify the full cross-package feature end to end
|
||||
|
||||
**Files:**
|
||||
|
||||
- No code changes expected.
|
||||
|
||||
- [ ] **Step 1: Run the focused package tests**
|
||||
|
||||
Run:
|
||||
`bun test packages/core/test/unstable-listing-mode.test.ts packages/core/test/facebook-core.test.ts packages/core/test/ebay-core.test.ts packages/core/test/kijiji-core.test.ts packages/api-server/test/routes.test.ts packages/mcp-server/test/protocol.test.ts`
|
||||
Expected: PASS with zero failing tests.
|
||||
|
||||
- [ ] **Step 2: Run the broader workspace verification**
|
||||
|
||||
Run: `bun run ci` Expected: PASS with clean workspace validation.
|
||||
|
||||
- [ ] **Step 3: Commit verification-only follow-ups if needed**
|
||||
|
||||
If verification forced any tiny fixes, commit them immediately after the fix with a
|
||||
focused message, for example:
|
||||
|
||||
```bash
|
||||
git add <exact files changed>
|
||||
git commit -m "fix: align unstable mode verification"
|
||||
```
|
||||
|
||||
If no files changed during verification, skip this commit step.
|
||||
|
||||
## Self-Review
|
||||
|
||||
- Spec coverage: shared classifier, all three scrapers, API exposure, MCP documentation,
|
||||
and tests are each mapped to a task.
|
||||
- Placeholder scan: no `TODO`, `TBD`, or “write tests later” placeholders remain.
|
||||
- Type consistency: the plan uses one shared flag name, `unstableFilter`, and one shared
|
||||
core option, `hideUnstableResults`, across all tasks.
|
||||
1061
docs/superpowers/plans/2026-04-28-code-smell-cleanup.md
Normal file
1061
docs/superpowers/plans/2026-04-28-code-smell-cleanup.md
Normal file
File diff suppressed because it is too large
Load Diff
110
docs/superpowers/plans/2026-04-30-ebay-dollar-price-inputs.md
Normal file
110
docs/superpowers/plans/2026-04-30-ebay-dollar-price-inputs.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# Marketplace Dollar Price Inputs Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to
|
||||
> implement this plan task-by-task.
|
||||
> Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Make public marketplace price inputs use dollars while preserving core scraper
|
||||
cent-based filtering.
|
||||
|
||||
**Architecture:** API server owns HTTP query parsing and converts dollar amounts to
|
||||
cents before calling core.
|
||||
MCP server keeps forwarding numeric dollar values as query params.
|
||||
Core scraper internals remain unchanged because parsed listing prices already use cents.
|
||||
This applies to eBay `minPrice`/`maxPrice` and Kijiji `priceMin`/`priceMax`; Facebook
|
||||
exposes no price filter inputs.
|
||||
|
||||
**Tech Stack:** Bun, TypeScript, `bun:test`, MCP JSON-RPC adapter, framework-free Bun
|
||||
HTTP routes.
|
||||
|
||||
* * *
|
||||
|
||||
### Task 1: API Dollar Parsing
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/api-server/src/routes/helpers.ts`
|
||||
|
||||
- Modify: `packages/api-server/src/routes/ebay.ts`
|
||||
|
||||
- Modify: `packages/api-server/src/routes/kijiji.ts`
|
||||
|
||||
- Test: `packages/api-server/test/routes.test.ts`
|
||||
|
||||
- [ ] **Step 1: Add failing API route tests**
|
||||
|
||||
Add tests proving eBay `minPrice=999.99` / `maxPrice=1000` and Kijiji `priceMin=999.99`
|
||||
/ `priceMax=1000` are forwarded to core as `99999` and `100000` cents.
|
||||
Add validation tests for empty, whitespace, negative, hex, mixed text, and malformed
|
||||
decimal price values.
|
||||
|
||||
Run: `bun test packages/api-server/test/routes.test.ts`
|
||||
|
||||
Expected: new forwarding tests fail because route currently rejects decimals and
|
||||
forwards integer dollars unchanged.
|
||||
|
||||
- [ ] **Step 2: Implement dollar parser helper**
|
||||
|
||||
Add `parseDollarPriceParam(searchParams, name)` in
|
||||
`packages/api-server/src/routes/helpers.ts`. Accept `0`, `1000`, `999.99`, and `0.99`.
|
||||
Reject values that do not match `^\d+(?:\.\d{1,2})?$`. Convert to cents with
|
||||
`Math.round(Number(rawValue) * 100)`.
|
||||
|
||||
- [ ] **Step 3: Use dollar parser in eBay route**
|
||||
|
||||
Replace `parseNonNegativeIntegerParam` calls for eBay `minPrice`/`maxPrice` and Kijiji
|
||||
`priceMin`/`priceMax` with `parseDollarPriceParam`. Keep pagination/count params on
|
||||
integer parsing.
|
||||
|
||||
- [ ] **Step 4: Verify API tests**
|
||||
|
||||
Run: `bun test packages/api-server/test/routes.test.ts`
|
||||
|
||||
Expected: all API route tests pass.
|
||||
|
||||
### Task 2: MCP Schema Contract
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/mcp-server/src/protocol/tools.ts`
|
||||
|
||||
- Test: `packages/mcp-server/test/protocol.test.ts`
|
||||
|
||||
- [ ] **Step 1: Add MCP schema/forwarding tests**
|
||||
|
||||
Add tests that `search_ebay` describes `minPrice` and `maxPrice` as dollar filters and
|
||||
forwards numeric dollar values unchanged in API query params.
|
||||
|
||||
Run: `bun test packages/mcp-server/test/protocol.test.ts`
|
||||
|
||||
Expected: description test fails until schema text changes; forwarding behavior should
|
||||
already pass or reveal mapping gaps.
|
||||
|
||||
- [ ] **Step 2: Update tool descriptions**
|
||||
|
||||
Change eBay `minPrice` and Kijiji `priceMin` descriptions to `Minimum price in dollars`.
|
||||
Change eBay `maxPrice` and Kijiji `priceMax` descriptions to `Maximum price in dollars`.
|
||||
|
||||
- [ ] **Step 3: Verify MCP tests**
|
||||
|
||||
Run: `bun test packages/mcp-server/test/protocol.test.ts`
|
||||
|
||||
Expected: all MCP protocol tests pass.
|
||||
|
||||
### Task 3: Cross-Package Verification
|
||||
|
||||
**Files:**
|
||||
|
||||
- No additional edits expected.
|
||||
|
||||
- [ ] **Step 1: Run relevant package tests**
|
||||
|
||||
Run: `bun test packages/api-server/test packages/mcp-server/test`
|
||||
|
||||
Expected: all tests pass.
|
||||
|
||||
- [ ] **Step 2: Run CI**
|
||||
|
||||
Run: `bun run ci`
|
||||
|
||||
Expected: typecheck and Biome pass without changing lint config.
|
||||
187
docs/superpowers/plans/2026-04-30-live-parser-tests.md
Normal file
187
docs/superpowers/plans/2026-04-30-live-parser-tests.md
Normal file
@@ -0,0 +1,187 @@
|
||||
# Live Parser Tests Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use
|
||||
> superpowers:subagent-driven-development (recommended) or superpowers:executing-plans
|
||||
> to implement this plan task-by-task.
|
||||
> Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Add explicit live endpoint test suites for each core marketplace scraper,
|
||||
excluded from default tests and runnable through one script.
|
||||
|
||||
**Architecture:** Live tests live under `packages/core/test/live/` and import public
|
||||
scraper entry points directly.
|
||||
Normal package tests remain offline because the new files are outside current explicit
|
||||
test commands and run only through `bun run test:live`.
|
||||
|
||||
**Tech Stack:** Bun `1.3.13`, `bun:test`, TypeScript, existing core scraper APIs.
|
||||
|
||||
* * *
|
||||
|
||||
## File Structure
|
||||
|
||||
- Create `packages/core/test/live/ebay.live.test.ts`: live eBay search smoke test
|
||||
against `fetchEbayItems`.
|
||||
- Create `packages/core/test/live/kijiji.live.test.ts`: live Kijiji search smoke test
|
||||
against `fetchKijijiItems`.
|
||||
- Create `packages/core/test/live/facebook.live.test.ts`: strict live Facebook search
|
||||
smoke test against `fetchFacebookItems` and `FACEBOOK_COOKIE`.
|
||||
- Modify `package.json`: add root script `test:live` running all files under
|
||||
`packages/core/test/live`.
|
||||
|
||||
### Task 1: Add eBay Live Suite
|
||||
|
||||
**Files:**
|
||||
|
||||
- Create: `packages/core/test/live/ebay.live.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the live test file**
|
||||
|
||||
```ts
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import fetchEbayItems from "../../src/scrapers/ebay";
|
||||
|
||||
describe("eBay live parser", () => {
|
||||
test("scrapes live search results into listing details", async () => {
|
||||
const results = await fetchEbayItems("iphone", 1, { maxItems: 3 });
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
for (const listing of results) {
|
||||
expect(listing.url).toStartWith("https://");
|
||||
expect(listing.title.length).toBeGreaterThan(0);
|
||||
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
|
||||
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run eBay live test**
|
||||
|
||||
Run: `bun test packages/core/test/live/ebay.live.test.ts` Expected: PASS when eBay
|
||||
returns parseable search results; FAIL on endpoint/rate-limit/parser breakage.
|
||||
|
||||
### Task 2: Add Kijiji Live Suite
|
||||
|
||||
**Files:**
|
||||
|
||||
- Create: `packages/core/test/live/kijiji.live.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the live test file**
|
||||
|
||||
```ts
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import fetchKijijiItems from "../../src/scrapers/kijiji";
|
||||
|
||||
describe("Kijiji live parser", () => {
|
||||
test("scrapes live search results into detailed listings", async () => {
|
||||
const results = await fetchKijijiItems(
|
||||
"iphone",
|
||||
1,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1 },
|
||||
{ includeImages: false, sellerDataDepth: "basic" },
|
||||
);
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
for (const listing of results) {
|
||||
expect(listing.url).toStartWith("https://www.kijiji.ca/");
|
||||
expect(listing.title.length).toBeGreaterThan(0);
|
||||
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
|
||||
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run Kijiji live test**
|
||||
|
||||
Run: `bun test packages/core/test/live/kijiji.live.test.ts` Expected: PASS when Kijiji
|
||||
returns parseable search and detail pages; FAIL on endpoint/parser breakage.
|
||||
|
||||
### Task 3: Add Facebook Live Suite
|
||||
|
||||
**Files:**
|
||||
|
||||
- Create: `packages/core/test/live/facebook.live.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the live test file**
|
||||
|
||||
```ts
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import fetchFacebookItems from "../../src/scrapers/facebook";
|
||||
|
||||
describe("Facebook live parser", () => {
|
||||
test("requires FACEBOOK_COOKIE for strict live testing", () => {
|
||||
expect(process.env.FACEBOOK_COOKIE?.trim().length ?? 0).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("scrapes live marketplace search results into listing details", async () => {
|
||||
const results = await fetchFacebookItems("iphone", 1, "toronto", 3);
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
for (const listing of results) {
|
||||
expect(listing.url).toStartWith("https://www.facebook.com/marketplace/item/");
|
||||
expect(listing.title.length).toBeGreaterThan(0);
|
||||
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
|
||||
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run Facebook live test**
|
||||
|
||||
Run: `bun test packages/core/test/live/facebook.live.test.ts` Expected: PASS with valid
|
||||
`FACEBOOK_COOKIE`; FAIL when `FACEBOOK_COOKIE` is missing, expired, or parser output is
|
||||
empty.
|
||||
|
||||
### Task 4: Add Root Live Test Script
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `package.json`
|
||||
|
||||
- [ ] **Step 1: Add script**
|
||||
|
||||
Change root `scripts` to include:
|
||||
|
||||
```json
|
||||
{
|
||||
"test:live": "bun test packages/core/test/live"
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run all live tests through script**
|
||||
|
||||
Run: `bun run test:live` Expected: runs eBay, Kijiji, and Facebook live suites.
|
||||
Facebook fails if `FACEBOOK_COOKIE` is unset.
|
||||
|
||||
### Task 5: Verify Default Suite Exclusion
|
||||
|
||||
**Files:**
|
||||
|
||||
- No code files modified.
|
||||
|
||||
- [ ] **Step 1: Run existing core tests**
|
||||
|
||||
Run: `bun test packages/core/test` Expected: existing mocked tests run.
|
||||
If Bun discovers `packages/core/test/live`, change normal verification command to
|
||||
explicit glob `bun test packages/core/test/*.test.ts` and document that in final notes.
|
||||
|
||||
- [ ] **Step 2: Run static checks**
|
||||
|
||||
Run: `bun run ci` Expected: typecheck and Biome pass.
|
||||
Fix code issues without changing lint or TypeScript rules.
|
||||
|
||||
## Commit Note
|
||||
|
||||
Do not commit during execution unless user explicitly requests a commit.
|
||||
This repo session policy overrides generic plan commit steps.
|
||||
|
||||
## Self-Review
|
||||
|
||||
- Spec coverage: eBay, Kijiji, Facebook live suites; explicit script; strict Facebook
|
||||
auth; excluded from default flow.
|
||||
- Placeholder scan: no `TBD`, `TODO`, or underspecified implementation steps.
|
||||
- Type consistency: tests use current exported scraper signatures and shared listing
|
||||
fields from `ListingDetails`.
|
||||
@@ -0,0 +1,140 @@
|
||||
# Design: Adopt opencode Monorepo Config
|
||||
|
||||
**Date:** 2025-07-14\
|
||||
**Status:** Approved\
|
||||
**Approach:** Full adoption (A)
|
||||
|
||||
## Context
|
||||
|
||||
Current repo (`marketplace-scrapers-monorepo`) has basic bun workspaces with 3 packages
|
||||
(`core`, `api-server`, `mcp-server`). Reference: `anomalyco/opencode` monorepo patterns.
|
||||
|
||||
**Gaps vs opencode:**
|
||||
- No Turbo (task orchestration, caching, dep graph)
|
||||
- No workspace catalog (shared dep versions duplicated across packages)
|
||||
- No root tsconfig (identical tsconfigs duplicated in all 3 packages)
|
||||
- No `bunfig.toml` (no exact installs, no root test guard)
|
||||
- `main`/`module` fields instead of `exports` field
|
||||
|
||||
## Changes
|
||||
|
||||
### 1. Root `package.json`
|
||||
|
||||
- Add `workspaces.catalog` block with shared deps:
|
||||
- `@typescript/native-preview`, `@types/bun`, `@types/unidecode`,
|
||||
`@types/cli-progress`
|
||||
- Add `turbo` to `devDependencies`
|
||||
- Add `@tsconfig/bun` to `devDependencies` + catalog
|
||||
- Update root scripts: `typecheck` and `build` delegate to `turbo run`
|
||||
- Keep `build:api`, `build:mcp`, `build:all`, `start` as-is (deployment-specific)
|
||||
- Rename `type:check` → `typecheck` in all packages (Turbo convention)
|
||||
|
||||
### 2. `turbo.json` (new file)
|
||||
|
||||
Tasks:
|
||||
```json
|
||||
{
|
||||
"tasks": {
|
||||
"typecheck": {},
|
||||
"build": { "dependsOn": ["^build"], "outputs": ["dist/**"] },
|
||||
"test": { "dependsOn": ["^build"], "outputs": [] }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`core` builds before `api-server`/`mcp-server` due to `^build` dep.
|
||||
|
||||
### 3. Root `tsconfig.json` (new file)
|
||||
|
||||
```json
|
||||
{
|
||||
"extends": "@tsconfig/bun/tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"lib": ["dom", "ESNext"],
|
||||
"target": "ESNext",
|
||||
"module": "preserve",
|
||||
"moduleResolution": "bundler",
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"moduleDetection": "force",
|
||||
"jsx": "react-jsx",
|
||||
"allowJs": true,
|
||||
"allowImportingTsExtensions": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"skipLibCheck": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"noUncheckedIndexedAccess": true,
|
||||
"noImplicitOverride": true,
|
||||
"noUnusedLocals": false,
|
||||
"noUnusedParameters": false,
|
||||
"noPropertyAccessFromIndexSignature": false
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Per-package `tsconfig.json` (slim)
|
||||
|
||||
All 3 packages slim to:
|
||||
```json
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"paths": { "@/*": ["./src/*"] }
|
||||
},
|
||||
"include": ["./src", "./test"]
|
||||
}
|
||||
```
|
||||
|
||||
### 5. `bunfig.toml` (new file)
|
||||
|
||||
```toml
|
||||
[install]
|
||||
exact = true
|
||||
|
||||
[test]
|
||||
root = "./do-not-run-tests-from-root"
|
||||
```
|
||||
|
||||
Exact installs = reproducible.
|
||||
Root test guard prevents accidental root-level test runs.
|
||||
|
||||
### 6. Package `exports` field
|
||||
|
||||
Replace `main`/`module` with `exports` in all 3 packages:
|
||||
```json
|
||||
"exports": { ".": "./src/index.ts" }
|
||||
```
|
||||
|
||||
Remove `main` and `module` fields.
|
||||
Bun resolves `.ts` directly.
|
||||
|
||||
### 7. Catalog references in per-package `package.json`
|
||||
|
||||
Replace pinned versions with `"catalog:"` for shared deps:
|
||||
- `@typescript/native-preview: "catalog:"`
|
||||
- `@types/bun: "catalog:"`
|
||||
- `@types/unidecode: "catalog:"` (core only)
|
||||
- `@types/cli-progress: "catalog:"` (core only)
|
||||
|
||||
## Files Changed
|
||||
|
||||
| File | Action |
|
||||
| --- | --- |
|
||||
| `package.json` | Update (catalog, turbo dep, scripts) |
|
||||
| `turbo.json` | Create |
|
||||
| `tsconfig.json` | Create |
|
||||
| `bunfig.toml` | Create |
|
||||
| `packages/core/package.json` | Update (exports, catalog refs, script rename) |
|
||||
| `packages/api-server/package.json` | Update (exports, catalog refs, script rename) |
|
||||
| `packages/mcp-server/package.json` | Update (exports, catalog refs, script rename) |
|
||||
| `packages/core/tsconfig.json` | Update (slim, extends root) |
|
||||
| `packages/api-server/tsconfig.json` | Update (slim, extends root) |
|
||||
| `packages/mcp-server/tsconfig.json` | Update (slim, extends root) |
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- No Husky/git hooks (not needed yet)
|
||||
- No SST/cloud infra (not applicable)
|
||||
- No prettier (keep biome as formatter)
|
||||
- No patches mechanism
|
||||
- No `postinstall` scripts
|
||||
@@ -3,7 +3,9 @@
|
||||
## Summary
|
||||
|
||||
Remove all file-based and request-provided cookie inputs across the repo.
|
||||
The only supported authentication input becomes a raw `Cookie` header string supplied through scraper-specific environment variables such as `FACEBOOK_COOKIE` and `EBAY_COOKIE`.
|
||||
The only supported authentication input becomes a raw `Cookie` header string supplied
|
||||
through scraper-specific environment variables such as `FACEBOOK_COOKIE` and
|
||||
`EBAY_COOKIE`.
|
||||
|
||||
## Goals
|
||||
|
||||
@@ -17,7 +19,8 @@ The only supported authentication input becomes a raw `Cookie` header string sup
|
||||
|
||||
- Changing scraper behavior unrelated to authentication input.
|
||||
- Adding new cookie formats or migration helpers.
|
||||
- Preserving backward compatibility for cookie files, JSON cookie arrays, or request overrides.
|
||||
- Preserving backward compatibility for cookie files, JSON cookie arrays, or request
|
||||
overrides.
|
||||
|
||||
## Current State
|
||||
|
||||
@@ -27,27 +30,33 @@ The current shared cookie utilities support three sources in priority order:
|
||||
2. Environment variable
|
||||
3. Cookie file
|
||||
|
||||
`packages/core/src/utils/cookies.ts` includes file loading, JSON array parsing, and auto-detection between JSON and header-string formats.
|
||||
Facebook also exposes deprecated `cookiePath` arguments that still reach shared loading logic.
|
||||
Docs in `cookies/AGENTS.md` still describe file-based setup and request-level overrides.
|
||||
`packages/core/src/utils/cookies.ts` includes file loading, JSON array parsing, and
|
||||
auto-detection between JSON and header-string formats.
|
||||
Facebook also exposes deprecated `cookiePath` arguments that still reach shared loading
|
||||
logic. Docs in `cookies/AGENTS.md` still describe file-based setup and request-level
|
||||
overrides.
|
||||
|
||||
## Chosen Approach
|
||||
|
||||
Use the hard-reset approach.
|
||||
Delete the shared multi-source cookie-loading model and reduce the cookie surface to env-header parsing only.
|
||||
This is a larger diff than a surgical removal, but it avoids leaving behind abstractions that imply unsupported inputs still exist.
|
||||
Delete the shared multi-source cookie-loading model and reduce the cookie surface to
|
||||
env-header parsing only.
|
||||
This is a larger diff than a surgical removal, but it avoids leaving behind abstractions
|
||||
that imply unsupported inputs still exist.
|
||||
|
||||
## Design
|
||||
|
||||
### Shared Cookie Utilities
|
||||
|
||||
`packages/core/src/utils/cookies.ts` will keep only the pieces needed for env-header-based auth:
|
||||
`packages/core/src/utils/cookies.ts` will keep only the pieces needed for
|
||||
env-header-based auth:
|
||||
|
||||
- `Cookie` type
|
||||
- A reduced cookie config shape containing only `name`, `domain`, and `envVar`
|
||||
- `parseCookieString()` for raw `Cookie` header strings
|
||||
- `formatCookiesForHeader()` for domain filtering and request formatting
|
||||
- An env-only loader that reads `process.env[config.envVar]`, parses it, and throws a targeted error when missing or invalid
|
||||
- An env-only loader that reads `process.env[config.envVar]`, parses it, and throws a
|
||||
targeted error when missing or invalid
|
||||
|
||||
The following shared utilities will be removed:
|
||||
|
||||
@@ -68,15 +77,18 @@ For Facebook this means:
|
||||
|
||||
For eBay this means:
|
||||
|
||||
- Remove any remaining fallback/file-oriented behavior from shared calls and error strings
|
||||
- Remove any remaining fallback/file-oriented behavior from shared calls and error
|
||||
strings
|
||||
- Keep the existing env-var auth path, but make it the only path
|
||||
|
||||
### Public API Surface
|
||||
|
||||
Exports from `packages/core/src/index.ts` should reflect the new contract.
|
||||
If exported functions currently advertise cookie-source or cookie-path arguments, their signatures will be tightened so callers cannot pass unsupported inputs.
|
||||
If exported functions currently advertise cookie-source or cookie-path arguments, their
|
||||
signatures will be tightened so callers cannot pass unsupported inputs.
|
||||
|
||||
Downstream adapter packages should continue calling core through the simplified signatures without adding their own cookie-loading behavior.
|
||||
Downstream adapter packages should continue calling core through the simplified
|
||||
signatures without adding their own cookie-loading behavior.
|
||||
|
||||
### Error Handling
|
||||
|
||||
@@ -93,8 +105,8 @@ Errors should be blunt and specific:
|
||||
|
||||
### Testing Strategy
|
||||
|
||||
Follow TDD.
|
||||
Start by changing or adding core tests so the old file/request behavior is no longer accepted.
|
||||
Follow TDD. Start by changing or adding core tests so the old file/request behavior is
|
||||
no longer accepted.
|
||||
|
||||
Coverage targets:
|
||||
|
||||
@@ -102,7 +114,8 @@ Coverage targets:
|
||||
2. Missing env vars fail with the new env-only error.
|
||||
3. Invalid env strings fail without falling back to files or request data.
|
||||
4. Facebook APIs no longer expose or honor cookie-path/request-cookie behavior.
|
||||
5. Existing tests that depended on missing files or JSON cookie arrays are rewritten to the env-only contract.
|
||||
5. Existing tests that depended on missing files or JSON cookie arrays are rewritten to
|
||||
the env-only contract.
|
||||
|
||||
Verification target after implementation:
|
||||
|
||||
@@ -121,11 +134,15 @@ Update cookie-related docs to match the new contract:
|
||||
|
||||
## Risks
|
||||
|
||||
- External callers using request cookie overrides will break at compile time or runtime, depending on how they consume the package.
|
||||
- Recent work added support for custom Facebook cookie paths, so removing that path intentionally reverses a newly introduced behavior.
|
||||
- Tests that currently model missing-file behavior must be rewritten rather than preserved.
|
||||
- External callers using request cookie overrides will break at compile time or runtime,
|
||||
depending on how they consume the package.
|
||||
- Recent work added support for custom Facebook cookie paths, so removing that path
|
||||
intentionally reverses a newly introduced behavior.
|
||||
- Tests that currently model missing-file behavior must be rewritten rather than
|
||||
preserved.
|
||||
|
||||
## Rollout Notes
|
||||
|
||||
This is an intentional contract break.
|
||||
The code, tests, and docs should all land together so there is no mixed messaging about supported cookie sources.
|
||||
The code, tests, and docs should all land together so there is no mixed messaging about
|
||||
supported cookie sources.
|
||||
|
||||
@@ -2,35 +2,46 @@
|
||||
|
||||
## Summary
|
||||
|
||||
Replace the legacy Facebook Marketplace scraper with a route-aware implementation built around current Comet bootstrap markers and route-specific extraction.
|
||||
The new scraper will keep authenticated direct HTTP fetches as the primary transport, but it will stop treating legacy `require`, `__bbox`, and `marketplace_product_details_page` structures as the main parsing contract.
|
||||
Replace the legacy Facebook Marketplace scraper with a route-aware implementation built
|
||||
around current Comet bootstrap markers and route-specific extraction.
|
||||
The new scraper will keep authenticated direct HTTP fetches as the primary transport,
|
||||
but it will stop treating legacy `require`, `__bbox`, and
|
||||
`marketplace_product_details_page` structures as the main parsing contract.
|
||||
|
||||
## Goals
|
||||
|
||||
- Replace both Facebook search and item-detail extraction with a current-shape parser.
|
||||
- Keep authenticated direct HTTP requests as the primary fetch strategy.
|
||||
- Parse route-specific Comet bootstrap/state payloads before falling back to rendered-HTML extraction.
|
||||
- Parse route-specific Comet bootstrap/state payloads before falling back to
|
||||
rendered-HTML extraction.
|
||||
- Detect auth-gated, unavailable, and unknown responses explicitly.
|
||||
- Update tests so they model current route markers and failure modes instead of legacy page objects.
|
||||
- Update tests so they model current route markers and failure modes instead of legacy
|
||||
page objects.
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Reworking non-Facebook scrapers.
|
||||
- Converting the scraper to browser-only automation.
|
||||
- Preserving old parser behavior for `marketplace_product_details_page` or `__bbox`-driven item extraction.
|
||||
- Reverse-engineering every internal Facebook bootstrap payload shape exhaustively before implementation.
|
||||
- Preserving old parser behavior for `marketplace_product_details_page` or
|
||||
`__bbox`-driven item extraction.
|
||||
- Reverse-engineering every internal Facebook bootstrap payload shape exhaustively
|
||||
before implementation.
|
||||
|
||||
## Current State
|
||||
|
||||
The current implementation in `packages/core/src/scrapers/facebook.ts` still uses authenticated HTTP requests, which remains correct.
|
||||
The search path parses embedded script JSON and looks for `marketplace_search.feed_units.edges`.
|
||||
The item-detail path is centered on legacy extraction paths such as:
|
||||
The current implementation in `packages/core/src/scrapers/facebook.ts` still uses
|
||||
authenticated HTTP requests, which remains correct.
|
||||
The search path parses embedded script JSON and looks for
|
||||
`marketplace_search.feed_units.edges`. The item-detail path is centered on legacy
|
||||
extraction paths such as:
|
||||
|
||||
- `parsed.require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
|
||||
- nested `__bbox.require[...]` variations
|
||||
- recursive search through `parsed.require`
|
||||
|
||||
Live evidence gathered earlier in this session and by the isolated research subagent shows that current Facebook Marketplace pages are Comet route-driven and expose markers such as:
|
||||
Live evidence gathered earlier in this session and by the isolated research subagent
|
||||
shows that current Facebook Marketplace pages are Comet route-driven and expose markers
|
||||
such as:
|
||||
|
||||
- `XCometMarketplaceSearchController`
|
||||
- `XCometMarketplacePermalinkController`
|
||||
@@ -41,7 +52,9 @@ Live evidence gathered earlier in this session and by the isolated research suba
|
||||
- `data-sjs`
|
||||
- `data-btmanifest`
|
||||
|
||||
The same live investigation also showed that authenticated item pages no longer expose the old `marketplace_product_details_page` marker reliably, while live search still returns usable results.
|
||||
The same live investigation also showed that authenticated item pages no longer expose
|
||||
the old `marketplace_product_details_page` marker reliably, while live search still
|
||||
returns usable results.
|
||||
|
||||
## Chosen Approach
|
||||
|
||||
@@ -52,9 +65,11 @@ The scraper will:
|
||||
1. Fetch authenticated HTML directly.
|
||||
2. Classify the response using current route and auth markers.
|
||||
3. Parse inline bootstrap/state payloads using route-specific probes.
|
||||
4. Fall back to rendered-HTML extraction only when bootstrap markers are present but the payload cannot be decoded into the expected search or item shape.
|
||||
4. Fall back to rendered-HTML extraction only when bootstrap markers are present but the
|
||||
payload cannot be decoded into the expected search or item shape.
|
||||
|
||||
This keeps the cheaper direct-HTTP transport while shifting the parser contract from legacy page-object names to current Comet route structure.
|
||||
This keeps the cheaper direct-HTTP transport while shifting the parser contract from
|
||||
legacy page-object names to current Comet route structure.
|
||||
|
||||
## Design
|
||||
|
||||
@@ -88,7 +103,8 @@ Primary behavior:
|
||||
- fetch the Marketplace search HTML with auth cookies
|
||||
- confirm the response class is `search`
|
||||
- extract inline bootstrap/state blobs from script tags and page attributes
|
||||
- probe for route-specific search payloads associated with `XCometMarketplaceSearchController`
|
||||
- probe for route-specific search payloads associated with
|
||||
`XCometMarketplaceSearchController`
|
||||
- map decoded search results into summary listing records
|
||||
|
||||
Search summary fields should remain aligned with the current public output shape:
|
||||
@@ -102,7 +118,8 @@ Search summary fields should remain aligned with the current public output shape
|
||||
|
||||
Fallback behavior:
|
||||
|
||||
- if search route markers are present but structured payload decoding fails, extract listing summaries from rendered HTML anchors and text patterns
|
||||
- if search route markers are present but structured payload decoding fails, extract
|
||||
listing summaries from rendered HTML anchors and text patterns
|
||||
- use item links matching `/marketplace/item/<id>` as the anchor for fallback extraction
|
||||
- treat fallback results as summary-only data, not rich detail data
|
||||
|
||||
@@ -132,9 +149,12 @@ Priority item fields:
|
||||
|
||||
Fallback behavior:
|
||||
|
||||
- if permalink route markers are present but no stable payload object is decodable, extract data from rendered HTML text structure
|
||||
- prioritize title, price, condition, description, location text, and seller module content
|
||||
- return partial item data when core user-facing fields are present rather than failing solely because deeper commerce metadata is missing
|
||||
- if permalink route markers are present but no stable payload object is decodable,
|
||||
extract data from rendered HTML text structure
|
||||
- prioritize title, price, condition, description, location text, and seller module
|
||||
content
|
||||
- return partial item data when core user-facing fields are present rather than failing
|
||||
solely because deeper commerce metadata is missing
|
||||
|
||||
### Bootstrap Parsing Strategy
|
||||
|
||||
@@ -151,11 +171,14 @@ Candidate discovery inputs:
|
||||
- `ServerJS` / `Bootloader` inline blobs
|
||||
- route controller names
|
||||
|
||||
Candidate scoring for search should favor objects that contain repeated result-card semantics, item IDs, listing links, titles, prices, or location summaries.
|
||||
Candidate scoring for item pages should favor objects that contain singular listing semantics, title, price, condition, description, location, seller, or permalink context.
|
||||
Candidate scoring for search should favor objects that contain repeated result-card
|
||||
semantics, item IDs, listing links, titles, prices, or location summaries.
|
||||
Candidate scoring for item pages should favor objects that contain singular listing
|
||||
semantics, title, price, condition, description, location, seller, or permalink context.
|
||||
|
||||
The parser should not depend on one hard-coded object name surviving forever.
|
||||
Instead, it should look for route-specific semantic clusters and choose the strongest candidate.
|
||||
Instead, it should look for route-specific semantic clusters and choose the strongest
|
||||
candidate.
|
||||
|
||||
### Legacy Removal
|
||||
|
||||
@@ -166,7 +189,9 @@ Specifically:
|
||||
- delete legacy-first `require` / `__bbox` navigation tables
|
||||
- delete tests whose only purpose is to preserve those legacy paths
|
||||
|
||||
If a minimal legacy compatibility branch remains, it must be a last-resort fallback behind the new route-aware parser and should not shape test fixtures or design decisions.
|
||||
If a minimal legacy compatibility branch remains, it must be a last-resort fallback
|
||||
behind the new route-aware parser and should not shape test fixtures or design
|
||||
decisions.
|
||||
|
||||
### Error Handling
|
||||
|
||||
@@ -178,7 +203,8 @@ Facebook responses should now fail with explicit route-aware outcomes:
|
||||
4. Search or item route detected, but no decodable data found.
|
||||
5. Unknown response shape.
|
||||
|
||||
Error messages should name the actual class of failure instead of implying that every parse miss is caused by expired cookies.
|
||||
Error messages should name the actual class of failure instead of implying that every
|
||||
parse miss is caused by expired cookies.
|
||||
|
||||
### Testing Strategy
|
||||
|
||||
@@ -190,11 +216,15 @@ Coverage targets:
|
||||
1. Search responses classify correctly from current Comet controller markers.
|
||||
2. Item responses classify correctly from current Comet controller markers.
|
||||
3. Login-gated and unavailable responses are detected before parsing.
|
||||
4. Search bootstrap parsing produces summary listing results from current-shape fixtures.
|
||||
4. Search bootstrap parsing produces summary listing results from current-shape
|
||||
fixtures.
|
||||
5. Item bootstrap parsing produces rich listing details from current-shape fixtures.
|
||||
6. Search fallback extraction works when route markers exist but structured payload decoding fails.
|
||||
7. Item fallback extraction works when route markers exist but structured payload decoding fails.
|
||||
8. Old legacy-only item fixtures are removed or rewritten so they no longer define the contract.
|
||||
6. Search fallback extraction works when route markers exist but structured payload
|
||||
decoding fails.
|
||||
7. Item fallback extraction works when route markers exist but structured payload
|
||||
decoding fails.
|
||||
8. Old legacy-only item fixtures are removed or rewritten so they no longer define the
|
||||
contract.
|
||||
|
||||
Verification target after implementation:
|
||||
|
||||
@@ -204,23 +234,30 @@ Verification target after implementation:
|
||||
|
||||
## Public API Surface
|
||||
|
||||
Keep the current public function names unless the rewrite proves that a signature change is required:
|
||||
Keep the current public function names unless the rewrite proves that a signature change
|
||||
is required:
|
||||
|
||||
- `fetchFacebookItems(...)`
|
||||
- `fetchFacebookItem(...)`
|
||||
- `extractFacebookMarketplaceData(...)`
|
||||
- `extractFacebookItemData(...)`
|
||||
|
||||
The internals should change substantially, but callers should not need a new integration surface for this rewrite.
|
||||
The internals should change substantially, but callers should not need a new integration
|
||||
surface for this rewrite.
|
||||
|
||||
## Risks
|
||||
|
||||
- Facebook may change bootstrap payload naming again, so route/controller markers are more stable than exact nested object paths but still not guaranteed.
|
||||
- Search and item pages may each contain multiple partial payloads, making candidate ranking important.
|
||||
- Fallback rendered-HTML extraction may be noisier than bootstrap decoding and needs clear precedence rules.
|
||||
- Live fixtures can drift from production quickly, so tests must model route semantics rather than exact one-off payloads where possible.
|
||||
- Facebook may change bootstrap payload naming again, so route/controller markers are
|
||||
more stable than exact nested object paths but still not guaranteed.
|
||||
- Search and item pages may each contain multiple partial payloads, making candidate
|
||||
ranking important.
|
||||
- Fallback rendered-HTML extraction may be noisier than bootstrap decoding and needs
|
||||
clear precedence rules.
|
||||
- Live fixtures can drift from production quickly, so tests must model route semantics
|
||||
rather than exact one-off payloads where possible.
|
||||
|
||||
## Rollout Notes
|
||||
|
||||
The code, fixtures, and tests should change together.
|
||||
There should be no mixed state where the implementation is Comet-aware but the tests still encode `marketplace_product_details_page` as the primary contract.
|
||||
There should be no mixed state where the implementation is Comet-aware but the tests
|
||||
still encode `marketplace_product_details_page` as the primary contract.
|
||||
|
||||
@@ -0,0 +1,173 @@
|
||||
# Unstable Listing Mode Design
|
||||
|
||||
## Summary
|
||||
|
||||
Add an optional shared result mode across Facebook, eBay, and Kijiji that moves
|
||||
suspiciously cheap listings out of the main results into a separate `unstableResults`
|
||||
bucket. Listings are considered unstable when their price is more than 20% below the
|
||||
median price of the scraper’s priced search results.
|
||||
|
||||
## Goals
|
||||
|
||||
- Support the same optional unstable-listing mode across all scrapers.
|
||||
- Keep current default scraper and route behavior unchanged unless the mode is enabled.
|
||||
- Hide unstable listings from the main results while still returning them separately.
|
||||
- Implement the rule once in shared core code instead of duplicating
|
||||
marketplace-specific logic.
|
||||
- Document the option in MCP tool descriptions so callers can discover it.
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Adding marketplace-specific thresholds or heuristics.
|
||||
- Re-ranking results beyond splitting stable and unstable buckets.
|
||||
- Classifying free, missing-price, or invalid-price listings as unstable.
|
||||
- Changing unrelated scraper parsing behavior.
|
||||
|
||||
## Current State
|
||||
|
||||
`packages/core` currently returns plain arrays from scraper search functions.
|
||||
`packages/api-server` forwards those scraper results directly from marketplace routes.
|
||||
`packages/mcp-server` documents search tools per marketplace, but does not expose or
|
||||
describe any result-stability mode.
|
||||
|
||||
There is no shared result-classification utility today.
|
||||
Price filtering exists in some scrapers, but not a cross-marketplace median-based split.
|
||||
|
||||
## Chosen Approach
|
||||
|
||||
Use a shared core utility plus per-route and per-tool opt-in.
|
||||
|
||||
The shared utility will accept parsed listings, compute the median from valid positive
|
||||
prices, and split the data into `results` and `unstableResults`. Each scraper will opt
|
||||
into that utility when the caller enables unstable-listing mode.
|
||||
API routes and MCP tools will expose the same optional mode so the feature is
|
||||
consistently available everywhere scraper search is surfaced.
|
||||
|
||||
This keeps the heuristic centralized, minimizes duplicated logic, and preserves existing
|
||||
consumers by leaving the default path unchanged.
|
||||
|
||||
## Design
|
||||
|
||||
### Shared Core Classification
|
||||
|
||||
Add a shared utility in `packages/core` for listing stability classification.
|
||||
|
||||
Responsibilities:
|
||||
|
||||
- accept parsed listing arrays with `listingPrice.cents`
|
||||
- ignore listings whose price is missing, non-numeric, or non-positive when computing
|
||||
the median
|
||||
- compute the median price from valid priced listings
|
||||
- classify listings as unstable when `listingPrice.cents < median * 0.8`
|
||||
- return an object with:
|
||||
- `results`: listings that remain in the main bucket
|
||||
- `unstableResults`: listings moved out of the main bucket
|
||||
|
||||
Listings excluded from median computation because their price is missing or non-positive
|
||||
remain in `results` unchanged.
|
||||
|
||||
### Scraper Integration
|
||||
|
||||
Facebook, eBay, and Kijiji search entrypoints will gain the same optional mode flag.
|
||||
|
||||
Default behavior:
|
||||
|
||||
- return the current plain array result shape
|
||||
|
||||
Opt-in behavior:
|
||||
|
||||
- run the shared classification utility after parsing search results
|
||||
- classify before final result limiting so unstable items do not consume main-result
|
||||
slots
|
||||
- return an object shaped like:
|
||||
|
||||
```ts
|
||||
{
|
||||
results: ListingDetails[];
|
||||
unstableResults: ListingDetails[];
|
||||
}
|
||||
```
|
||||
|
||||
Each scraper will use its existing concrete listing subtype for these arrays.
|
||||
|
||||
### API Surface
|
||||
|
||||
Marketplace API routes will expose an optional query parameter for unstable-listing
|
||||
mode.
|
||||
|
||||
Requirements:
|
||||
|
||||
- keep existing route responses unchanged when the parameter is absent or false
|
||||
- when enabled, return the object payload with `results` and `unstableResults`
|
||||
- use the same semantics across Facebook, eBay, and Kijiji routes
|
||||
|
||||
The exact parameter name should be consistent across routes and intentionally describe
|
||||
the behavior, for example `unstableFilter=true`.
|
||||
|
||||
### MCP Surface
|
||||
|
||||
Marketplace MCP tools will expose the same optional mode as an input field.
|
||||
|
||||
Tool descriptions should explicitly document:
|
||||
|
||||
- that the option is optional
|
||||
- that it moves listings priced more than 20% below the median into `unstableResults`
|
||||
- that enabling it changes the response shape from a plain list to an object with
|
||||
`results` and `unstableResults`
|
||||
- that the behavior is available for Facebook, eBay, and Kijiji search tools
|
||||
|
||||
The wording should be aligned across all three tools so the feature reads as one shared
|
||||
capability.
|
||||
|
||||
### Error Handling
|
||||
|
||||
The unstable-listing mode should be best-effort and non-failing.
|
||||
|
||||
- If there are no valid positive prices, return all listings in `results` and an empty
|
||||
`unstableResults` array.
|
||||
- If there is only one valid priced listing, do not classify it as unstable.
|
||||
- Parsing failures remain governed by existing scraper behavior; the classification
|
||||
layer should not introduce new scraper-specific errors.
|
||||
|
||||
### Testing Strategy
|
||||
|
||||
Follow TDD. Start with shared utility tests, then wire the option through scraper and
|
||||
route tests.
|
||||
|
||||
Coverage targets:
|
||||
|
||||
1. Median calculation for odd-sized valid price sets.
|
||||
2. Median calculation for even-sized valid price sets.
|
||||
3. Strict cutoff behavior where only listings with `price < median * 0.8` move to
|
||||
`unstableResults`.
|
||||
4. Missing, invalid, zero, or negative prices are excluded from median computation and
|
||||
remain in `results`.
|
||||
5. Default scraper behavior still returns plain arrays when the option is disabled.
|
||||
6. Enabled scraper behavior returns `{ results, unstableResults }` for Facebook, eBay,
|
||||
and Kijiji.
|
||||
7. API routes preserve existing response shapes by default and switch to the object
|
||||
payload only when enabled.
|
||||
8. MCP tool metadata documents the new optional mode for all three marketplace search
|
||||
tools.
|
||||
|
||||
Verification target after implementation:
|
||||
|
||||
- `bun test packages/core/test`
|
||||
- `bun test packages/api-server/test`
|
||||
- `bun test packages/mcp-server/test` if MCP metadata tests exist or are added
|
||||
- `bun run ci`
|
||||
|
||||
## Risks
|
||||
|
||||
- The optional mode introduces a union return shape for scraper callers, which can
|
||||
ripple into downstream TypeScript signatures.
|
||||
- Applying classification before final limiting changes which items appear in the main
|
||||
bucket compared with a naive post-limit split.
|
||||
- Kijiji and eBay may have different mixes of priced and unpriced results, so excluding
|
||||
non-positive prices from the median must remain explicit and tested.
|
||||
|
||||
## Rollout Notes
|
||||
|
||||
Land the shared classifier, scraper wiring, route wiring, tests, and MCP description
|
||||
updates together. That avoids a partial rollout where the feature exists in one surface
|
||||
but is undocumented or inconsistent elsewhere.
|
||||
@@ -0,0 +1,44 @@
|
||||
# Live Parser Tests Design
|
||||
|
||||
## Summary
|
||||
|
||||
Add explicit live endpoint tests for each core scraper parser path.
|
||||
These tests are excluded from normal deterministic test commands and run only through a
|
||||
dedicated package script.
|
||||
|
||||
## Scope
|
||||
|
||||
- Add one live suite per parser: eBay, Kijiji, Facebook.
|
||||
- Place suites under `packages/core/test/live/` so normal
|
||||
`bun test packages/core/test/*.test.ts` patterns do not include them accidentally.
|
||||
- Add a root `test:live` script that runs all live suites together.
|
||||
- Keep existing mocked tests unchanged.
|
||||
|
||||
## Behavior
|
||||
|
||||
- Each suite calls the public scraper entry point for that marketplace with a narrow
|
||||
query and low max item count.
|
||||
- Assertions verify scrape output shape and parser viability, not exact listing
|
||||
identity.
|
||||
- eBay and Kijiji require live network access and fail on endpoint/parser breakage.
|
||||
- Facebook is strict: missing or expired `FACEBOOK_COOKIE` fails the live suite instead
|
||||
of skipping.
|
||||
|
||||
## Test Data
|
||||
|
||||
- Use stable broad Canadian queries such as `iphone` or `laptop` to reduce empty-result
|
||||
risk.
|
||||
- Use low limits to avoid unnecessary load and rate-limit pressure.
|
||||
- Avoid exact prices, titles, listing IDs, or ordering assumptions.
|
||||
|
||||
## Failure Meaning
|
||||
|
||||
- Empty result arrays fail because live parser logic did not produce usable listings.
|
||||
- Missing required fields fail because adapter contracts depend on those fields.
|
||||
- Authentication failures fail for Facebook because selected scope is strict.
|
||||
|
||||
## Verification
|
||||
|
||||
- Normal suite remains offline: `bun test packages/core/test`.
|
||||
- Live suite runs by explicit script: `bun run test:live`.
|
||||
- Full static checks remain via `bun run ci`.
|
||||
@@ -0,0 +1,173 @@
|
||||
# Facebook Marketplace Anti-Bot Challenge Solver Design
|
||||
|
||||
## Summary
|
||||
|
||||
Add a challenge-detection and challenge-solving layer to the Facebook Marketplace
|
||||
scraper so it can handle anti-bot gates (checkpoint pages, token rotation, cookie
|
||||
requirements) programmatically.
|
||||
Build the solver in pure Bun — no browser automation in production.
|
||||
Use `agent-browser` only for one-time debug reconnaissance.
|
||||
|
||||
## Goals
|
||||
|
||||
- Identify which anti-bot challenge(s) Facebook Marketplace triggers against
|
||||
programmatic HTTP requests.
|
||||
- Implement detection + solving for each discovered challenge type.
|
||||
- Wire the solver into `fetchFacebookItems` and `fetchFacebookItem` so challenges are
|
||||
handled transparently.
|
||||
- Follow the same pattern as the existing `ebay-challenge.ts` (detect → solve → retry
|
||||
with clearance).
|
||||
- Zero browser automation at runtime.
|
||||
Pure `fetch` + `Bun` APIs + npm packages only.
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Solving login/auth-wall challenges (those require fresh cookies — not solvable
|
||||
programmatically).
|
||||
- Full account login automation (cookies must be provided by the user).
|
||||
- Browser-based scraping or Puppeteer/Playwright integration.
|
||||
- Solving challenges for non-Marketplace Facebook endpoints.
|
||||
|
||||
## Current State
|
||||
|
||||
The Facebook scraper (`packages/core/src/scrapers/facebook.ts`) fetches Marketplace
|
||||
search and item pages via authenticated `fetch` with cookies from `FACEBOOK_COOKIE` env
|
||||
var. It:
|
||||
|
||||
- Sends a browser-like header set (`sec-ch-ua`, `user-agent`, etc.)
|
||||
- Parses SSR HTML for embedded JSON in script tags
|
||||
- Has no challenge detection — if Facebook returns a challenge page, the scraper
|
||||
silently fails (no listings parsed, classifies as “unknown”)
|
||||
- Depends entirely on cookie freshness
|
||||
|
||||
The eBay scraper already follows the challenge-solver pattern in this codebase:
|
||||
`ebay.ts` uses `warmEbaySession()`, `isChallengeRedirect()`, `isChallengeHtml()`, and
|
||||
`solveEbayChallenge()` from `ebay-challenge.ts`.
|
||||
|
||||
## Chosen Approach
|
||||
|
||||
**Reconnaissance-first development:**
|
||||
|
||||
1. Use `agent-browser` (debug only) to capture a real Facebook Marketplace browsing
|
||||
session via HAR.
|
||||
2. Probe programmatic `fetch` to see what Facebook returns without a browser.
|
||||
3. Diff the two to identify the gap (missing headers?
|
||||
missing cookies? missing JS execution?).
|
||||
4. Build a modular solver in `packages/core/src/utils/facebook-challenge.ts` that
|
||||
detects each challenge type and applies the appropriate fix.
|
||||
5. Wire it into `facebook.ts` following the eBay pattern.
|
||||
|
||||
## Design
|
||||
|
||||
### File Plan
|
||||
|
||||
| File | Purpose |
|
||||
| --- | --- |
|
||||
| `packages/core/src/utils/facebook-challenge.ts` | Challenge detection, solving, and cookie/session utilities |
|
||||
| `packages/core/src/scrapers/facebook.ts` | Modified: warmup, challenge detection before parsing, retry loop |
|
||||
| `packages/core/test/facebook-challenge.test.ts` | Unit tests with mock challenge HTML fixtures |
|
||||
|
||||
### Flow
|
||||
|
||||
```
|
||||
fetchFacebookItems(searchUrl)
|
||||
├── warmFacebookSession() → GET facebook.com/ (collect datr + Akamai cookies)
|
||||
├── fetchHtml(searchUrl) → receives response
|
||||
├── detectFacebookChallenge(response)
|
||||
│ ├── checkpoint/challenge HTML → solveCheckpointChallenge()
|
||||
│ ├── redirect to /login → fail (cookies expired)
|
||||
│ ├── missing required cookies → regenerate session
|
||||
│ ├── 429 rate limit → backoff + retry (existing http.ts handles this)
|
||||
│ └── no challenge → proceed to parsing
|
||||
├── if solveCheckpointChallenge succeeds → retry fetchHtml with clearance cookie
|
||||
└── parse results
|
||||
```
|
||||
|
||||
### Challenge Types (to be confirmed by reconnaissance)
|
||||
|
||||
| Type | Expected Signal | Solving Strategy |
|
||||
| --- | --- | --- |
|
||||
| Login wall | Redirect to `/login` or HTML `"You must log in"` | Fail — user must provide fresh cookies |
|
||||
| Checkpoint page | HTML contains `checkpoint` or `challenge` path | Parse hidden form fields, compute proof-of-work if present, submit answer endpoint |
|
||||
| `datr` cookie missing | No `datr` in cookie jar → request fails | Fetch homepage first to obtain `datr` (session warmup) |
|
||||
| DTSG token needed | Form submissions fail with CSRF error | Extract `fb_dtsg` from page HTML, include in request body |
|
||||
| GraphQL header check | Request blocked without internal headers | Extract `x-fb-friendly-name` from browser HAR, replicate |
|
||||
| Akamai/bot-manager | Redirect loops or blank pages without Akamai cookies | Homepage warmup to collect `bm_sv`, `bm_mi`, etc. |
|
||||
|
||||
### Key Modules
|
||||
|
||||
**`facebook-challenge.ts`:**
|
||||
|
||||
```
|
||||
// Session warmup — fetch homepage to prime cookies
|
||||
warmFacebookSession(): Promise<Record<string, string>>
|
||||
|
||||
// Challenge detection
|
||||
detectFacebookChallenge(html, status, url, headers): ChallengeType | null
|
||||
|
||||
// Checkpoint solver
|
||||
solveCheckpointChallenge(html, cookies): Promise<ChallengeResult>
|
||||
|
||||
// DTSG token extraction
|
||||
extractDtsg(html): string | null
|
||||
|
||||
// Cookie jar management (shared with ebay.ts pattern)
|
||||
mergeCookies(...): Record<string, string>
|
||||
```
|
||||
|
||||
**`ChallengeResult` type:**
|
||||
```ts
|
||||
interface ChallengeResult {
|
||||
solved: boolean;
|
||||
cookies?: Record<string, string>; // clearance cookies to replay
|
||||
token?: string; // challenge response token
|
||||
error?: string; // why it failed
|
||||
}
|
||||
```
|
||||
|
||||
### Error Handling
|
||||
|
||||
- Solver failure → return `ChallengeResult { solved: false, error: "..." }`, scraper
|
||||
logs warning and returns empty results (never throws).
|
||||
- Unrecognized challenge → log the response URL and HTML snippet for future analysis.
|
||||
- Rate limits → handled by existing `http.ts` exponential backoff (no change needed).
|
||||
- Solver timeout → 30s cap on any challenge computation, fall back to `solved: false`.
|
||||
|
||||
### Testing
|
||||
|
||||
| Test | What It Verifies |
|
||||
| --- | --- |
|
||||
| `detectFacebookChallenge` with sample checkpoint HTML | Correctly identifies checkpoint challenge |
|
||||
| `detectFacebookChallenge` with normal search HTML | Returns null (no false positives) |
|
||||
| `detectFacebookChallenge` with login redirect | Identifies auth-gated |
|
||||
| `solveCheckpointChallenge` with known PoW params | Produces correct answer |
|
||||
| `warmFacebookSession` with mocked fetch | Collects expected cookies |
|
||||
| `extractDtsg` with sample page HTML | Extracts the DTSG token |
|
||||
| Integration: fetch → challenge → solve → retry → results | End-to-end mock flow |
|
||||
| Solver throws → scraper returns empty, no crash | Graceful fallback |
|
||||
| Solver unknown challenge → logs warning, returns empty | No unhandled challenge crashes |
|
||||
|
||||
Test data will use anonymized HTML fixtures (no real user data).
|
||||
|
||||
## Reconnaissance Steps (debug-only, one-time)
|
||||
|
||||
1. **Probe programmatically:** `fetch` Marketplace search with/without cookies, record
|
||||
status code and HTML.
|
||||
2. **Browser session:** `agent-browser` → log into Facebook → navigate Marketplace →
|
||||
record HAR.
|
||||
3. **Diff analysis:** Compare browser request headers vs.
|
||||
our programmatic headers.
|
||||
4. **Cookie inventory:** List all cookies from browser session, identify which are
|
||||
essential.
|
||||
5. **Challenge trigger:** Identify what change in request signature triggers a
|
||||
challenge.
|
||||
6. **Replay test:** Replay browser’s exact request via `fetch` to confirm
|
||||
headers/cookies are the differentiator.
|
||||
|
||||
All reconnaissance artifacts saved under `docs/facebook-challenge/`.
|
||||
|
||||
## Decisions Deferred to Post-Reconnaissance
|
||||
|
||||
- Exact challenge types and solving strategies (depends on what Facebook actually uses).
|
||||
- Whether a PoW solver, CAPTCHA solver, or token-extraction approach is needed.
|
||||
- npm package dependencies (only add what the reconnaissance proves necessary).
|
||||
36
package.json
36
package.json
@@ -1,21 +1,39 @@
|
||||
{
|
||||
"$schema": "https://json.schemastore.org/package.json",
|
||||
"name": "marketplace-scrapers-monorepo",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"packageManager": "bun@1.3.13",
|
||||
"scripts": {
|
||||
"ci": "biome ci",
|
||||
"clean": "rm -rf dist",
|
||||
"typecheck": "turbo run typecheck",
|
||||
"build": "bun run clean && turbo run build",
|
||||
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
|
||||
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
|
||||
"build:all": "bun run build:api && bun run build:mcp",
|
||||
"build": "bun run clean && bun run build:all",
|
||||
"ci": "bun run typecheck && biome check --write",
|
||||
"test:live": "bun test --cwd packages/core test/live",
|
||||
"clean": "rm -rf dist",
|
||||
"start": "./scripts/start.sh"
|
||||
},
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"workspaces": [
|
||||
"packages/*"
|
||||
],
|
||||
"workspaces": {
|
||||
"packages": [
|
||||
"packages/*"
|
||||
],
|
||||
"catalog": {
|
||||
"@tsconfig/bun": "1.0.9",
|
||||
"@typescript/native-preview": "7.0.0-dev.20260428.1",
|
||||
"@types/bun": "1.3.13",
|
||||
"@types/cli-progress": "3.11.6",
|
||||
"@types/unidecode": "1.1.0"
|
||||
}
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "2.3.11"
|
||||
"@biomejs/biome": "2.3.11",
|
||||
"@tsconfig/bun": "catalog:",
|
||||
"turbo": "2.5.4"
|
||||
},
|
||||
"dependencies": {
|
||||
"@types/bun": "1.3.13"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,5 +19,6 @@
|
||||
|
||||
## Verify
|
||||
|
||||
- `bun test packages/api-server/test`
|
||||
- `bun run --cwd packages/api-server build`
|
||||
- `bun run ci`
|
||||
|
||||
@@ -2,18 +2,22 @@
|
||||
"name": "@marketplace-scrapers/api-server",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"module": "./src/index.ts",
|
||||
"exports": {
|
||||
".": "./src/index.ts"
|
||||
},
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"start": "bun ./src/index.ts",
|
||||
"dev": "bun --watch ./src/index.ts",
|
||||
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/api"
|
||||
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/api",
|
||||
"typecheck": "bun tsgo"
|
||||
},
|
||||
"dependencies": {
|
||||
"@marketplace-scrapers/core": "workspace:*"
|
||||
"@marketplace-scrapers/core": "workspace:*",
|
||||
"@typescript/native-preview": "catalog:"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "latest"
|
||||
"@types/bun": "catalog:"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5"
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { logger } from "./logger";
|
||||
import { ebayRoute } from "./routes/ebay";
|
||||
import { facebookRoute } from "./routes/facebook";
|
||||
import { kijijiRoute } from "./routes/kijiji";
|
||||
@@ -27,4 +28,4 @@ const server = Bun.serve({
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`API Server running on ${server.hostname}:${server.port}`);
|
||||
logger.log(`API Server running on ${server.hostname}:${server.port}`);
|
||||
|
||||
10
packages/api-server/src/logger.ts
Normal file
10
packages/api-server/src/logger.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
const isTest = () => process.env.NODE_ENV === "test";
|
||||
|
||||
export const logger = {
|
||||
log: (...args: Parameters<typeof console.log>) => {
|
||||
if (!isTest()) console.log(...args);
|
||||
},
|
||||
error: (...args: Parameters<typeof console.error>) => {
|
||||
if (!isTest()) console.error(...args);
|
||||
},
|
||||
};
|
||||
@@ -1,62 +1,84 @@
|
||||
import { fetchEbayItems } from "@marketplace-scrapers/core";
|
||||
import { logger } from "../logger";
|
||||
import {
|
||||
emptySearchResponse,
|
||||
getRequiredSearchQuery,
|
||||
parseDollarPriceParam,
|
||||
parseNonNegativeIntegerParam,
|
||||
} from "./helpers";
|
||||
|
||||
/**
|
||||
* GET /api/ebay?q={query}&minPrice={minPrice}&maxPrice={maxPrice}&strictMode={strictMode}&exclusions={exclusions}&keywords={keywords}&buyItNowOnly={buyItNowOnly}&canadaOnly={canadaOnly}
|
||||
* Search eBay for listings (default: Buy It Now only, Canada only)
|
||||
*/
|
||||
export async function ebayRoute(req: Request): Promise<Response> {
|
||||
const reqUrl = new URL(req.url);
|
||||
|
||||
const SEARCH_QUERY = getRequiredSearchQuery(req);
|
||||
if (SEARCH_QUERY instanceof Response) {
|
||||
return SEARCH_QUERY;
|
||||
}
|
||||
|
||||
const minPrice = parseDollarPriceParam(reqUrl.searchParams, "minPrice");
|
||||
if (minPrice instanceof Response) {
|
||||
return minPrice;
|
||||
}
|
||||
const maxPrice = parseDollarPriceParam(reqUrl.searchParams, "maxPrice");
|
||||
if (maxPrice instanceof Response) {
|
||||
return maxPrice;
|
||||
}
|
||||
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
|
||||
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
|
||||
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
|
||||
const exclusionsParam = reqUrl.searchParams.get("exclusions");
|
||||
const exclusions = exclusionsParam
|
||||
? exclusionsParam.split(",").map((s) => s.trim())
|
||||
: [];
|
||||
const keywordsParam = reqUrl.searchParams.get("keywords");
|
||||
const keywords = keywordsParam
|
||||
? keywordsParam.split(",").map((s) => s.trim())
|
||||
: [SEARCH_QUERY];
|
||||
|
||||
const maxItems = parseNonNegativeIntegerParam(
|
||||
reqUrl.searchParams,
|
||||
"maxItems",
|
||||
);
|
||||
if (maxItems instanceof Response) {
|
||||
return maxItems;
|
||||
}
|
||||
const hideUnstableResults =
|
||||
reqUrl.searchParams.get("unstableFilter") === "true";
|
||||
const opts = {
|
||||
minPrice,
|
||||
maxPrice,
|
||||
strictMode,
|
||||
exclusions,
|
||||
keywords,
|
||||
buyItNowOnly,
|
||||
canadaOnly,
|
||||
maxItems,
|
||||
};
|
||||
|
||||
try {
|
||||
const reqUrl = new URL(req.url);
|
||||
if (hideUnstableResults) {
|
||||
const items = await fetchEbayItems(SEARCH_QUERY, 1, opts, {
|
||||
hideUnstableResults: true,
|
||||
});
|
||||
if (items.results.length === 0 && items.unstableResults.length === 0) {
|
||||
return emptySearchResponse();
|
||||
}
|
||||
return Response.json(items, { status: 200 });
|
||||
}
|
||||
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message:
|
||||
"Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
const items = await fetchEbayItems(SEARCH_QUERY, 1, opts);
|
||||
const isEmpty = !items || items.length === 0;
|
||||
|
||||
const minPriceParam = reqUrl.searchParams.get("minPrice");
|
||||
const minPrice = minPriceParam ? parseInt(minPriceParam, 10) : undefined;
|
||||
const maxPriceParam = reqUrl.searchParams.get("maxPrice");
|
||||
const maxPrice = maxPriceParam ? parseInt(maxPriceParam, 10) : undefined;
|
||||
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
|
||||
const buyItNowOnly = reqUrl.searchParams.get("buyItNowOnly") !== "false";
|
||||
const canadaOnly = reqUrl.searchParams.get("canadaOnly") !== "false";
|
||||
const exclusionsParam = reqUrl.searchParams.get("exclusions");
|
||||
const exclusions = exclusionsParam
|
||||
? exclusionsParam.split(",").map((s) => s.trim())
|
||||
: [];
|
||||
const keywordsParam = reqUrl.searchParams.get("keywords");
|
||||
const keywords = keywordsParam
|
||||
? keywordsParam.split(",").map((s) => s.trim())
|
||||
: [SEARCH_QUERY];
|
||||
|
||||
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : undefined;
|
||||
const items = await fetchEbayItems(SEARCH_QUERY, 1, {
|
||||
minPrice,
|
||||
maxPrice,
|
||||
strictMode,
|
||||
exclusions,
|
||||
keywords,
|
||||
buyItNowOnly,
|
||||
canadaOnly,
|
||||
});
|
||||
|
||||
const results = maxItems ? items.slice(0, maxItems) : items;
|
||||
|
||||
if (!results || results.length === 0)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
);
|
||||
return Response.json(results, { status: 200 });
|
||||
if (isEmpty) {
|
||||
return emptySearchResponse();
|
||||
}
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("eBay scraping error:", error);
|
||||
logger.error("eBay scraping error:", error);
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json({ message: errorMessage }, { status: 400 });
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
import { fetchFacebookItems } from "@marketplace-scrapers/core";
|
||||
import { logger } from "../logger";
|
||||
import {
|
||||
emptySearchResponse,
|
||||
getRequiredSearchQuery,
|
||||
parseNonNegativeIntegerParam,
|
||||
} from "./helpers";
|
||||
|
||||
/**
|
||||
* GET /api/facebook?q={query}&location={location}
|
||||
@@ -7,30 +13,47 @@ import { fetchFacebookItems } from "@marketplace-scrapers/core";
|
||||
export async function facebookRoute(req: Request): Promise<Response> {
|
||||
const reqUrl = new URL(req.url);
|
||||
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message: "Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
const SEARCH_QUERY = getRequiredSearchQuery(req);
|
||||
if (SEARCH_QUERY instanceof Response) {
|
||||
return SEARCH_QUERY;
|
||||
}
|
||||
|
||||
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
|
||||
const maxItemsParam = reqUrl.searchParams.get("maxItems");
|
||||
const maxItems = maxItemsParam ? parseInt(maxItemsParam, 10) : 25;
|
||||
const maxItems = parseNonNegativeIntegerParam(
|
||||
reqUrl.searchParams,
|
||||
"maxItems",
|
||||
25,
|
||||
);
|
||||
if (maxItems instanceof Response) {
|
||||
return maxItems;
|
||||
}
|
||||
const hideUnstableResults =
|
||||
reqUrl.searchParams.get("unstableFilter") === "true";
|
||||
|
||||
try {
|
||||
const items = await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems);
|
||||
if (!items || items.length === 0)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
if (hideUnstableResults) {
|
||||
const items = await fetchFacebookItems(
|
||||
SEARCH_QUERY,
|
||||
1,
|
||||
LOCATION,
|
||||
maxItems,
|
||||
{
|
||||
hideUnstableResults: true,
|
||||
},
|
||||
);
|
||||
if (items.results.length === 0 && items.unstableResults.length === 0) {
|
||||
return emptySearchResponse();
|
||||
}
|
||||
return Response.json(items, { status: 200 });
|
||||
}
|
||||
|
||||
const items = await fetchFacebookItems(SEARCH_QUERY, 1, LOCATION, maxItems);
|
||||
if (!items || items.length === 0) {
|
||||
return emptySearchResponse();
|
||||
}
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("Facebook scraping error:", error);
|
||||
logger.error("Facebook scraping error:", error);
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json({ message: errorMessage }, { status: 400 });
|
||||
|
||||
64
packages/api-server/src/routes/helpers.ts
Normal file
64
packages/api-server/src/routes/helpers.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
export function getRequiredSearchQuery(req: Request): string | Response {
|
||||
const reqUrl = new URL(req.url);
|
||||
const query = req.headers.get("query") || reqUrl.searchParams.get("q");
|
||||
if (!query) {
|
||||
return Response.json(
|
||||
{
|
||||
message: "Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
export function parseNonNegativeIntegerParam(
|
||||
searchParams: URLSearchParams,
|
||||
name: string,
|
||||
defaultValue: number,
|
||||
): number | Response;
|
||||
export function parseNonNegativeIntegerParam(
|
||||
searchParams: URLSearchParams,
|
||||
name: string,
|
||||
): number | undefined | Response;
|
||||
export function parseNonNegativeIntegerParam(
|
||||
searchParams: URLSearchParams,
|
||||
name: string,
|
||||
defaultValue?: number,
|
||||
): number | undefined | Response {
|
||||
const rawValue = searchParams.get(name);
|
||||
if (rawValue === null) {
|
||||
return defaultValue;
|
||||
}
|
||||
if (!/^\d+$/.test(rawValue)) {
|
||||
return Response.json(
|
||||
{ message: `Invalid ${name} parameter` },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
return Number(rawValue);
|
||||
}
|
||||
|
||||
export function parseDollarPriceParam(
|
||||
searchParams: URLSearchParams,
|
||||
name: string,
|
||||
): number | undefined | Response {
|
||||
const rawValue = searchParams.get(name);
|
||||
if (rawValue === null) {
|
||||
return undefined;
|
||||
}
|
||||
if (!/^\d+(?:\.\d{1,2})?$/.test(rawValue)) {
|
||||
return Response.json(
|
||||
{ message: `Invalid ${name} parameter` },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
return Math.round(Number(rawValue) * 100);
|
||||
}
|
||||
|
||||
export function emptySearchResponse(hint?: string): Response {
|
||||
const message = hint
|
||||
? `Search didn't return any results! ${hint}`
|
||||
: "Search didn't return any results!";
|
||||
return Response.json({ message }, { status: 404 });
|
||||
}
|
||||
@@ -1,4 +1,11 @@
|
||||
import { fetchKijijiItems } from "@marketplace-scrapers/core";
|
||||
import { logger } from "../logger";
|
||||
import {
|
||||
emptySearchResponse,
|
||||
getRequiredSearchQuery,
|
||||
parseDollarPriceParam,
|
||||
parseNonNegativeIntegerParam,
|
||||
} from "./helpers";
|
||||
|
||||
/**
|
||||
* GET /api/kijiji?q={query}
|
||||
@@ -7,44 +14,68 @@ import { fetchKijijiItems } from "@marketplace-scrapers/core";
|
||||
export async function kijijiRoute(req: Request): Promise<Response> {
|
||||
const reqUrl = new URL(req.url);
|
||||
|
||||
const SEARCH_QUERY =
|
||||
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
|
||||
if (!SEARCH_QUERY)
|
||||
return Response.json(
|
||||
{
|
||||
message: "Request didn't have 'query' header or 'q' search parameter!",
|
||||
},
|
||||
{ status: 400 },
|
||||
);
|
||||
const SEARCH_QUERY = getRequiredSearchQuery(req);
|
||||
if (SEARCH_QUERY instanceof Response) {
|
||||
return SEARCH_QUERY;
|
||||
}
|
||||
|
||||
const maxPagesParam = reqUrl.searchParams.get("maxPages");
|
||||
const maxPages = maxPagesParam ? parseInt(maxPagesParam, 10) : 5;
|
||||
const priceMinParam = reqUrl.searchParams.get("priceMin");
|
||||
const priceMin = priceMinParam ? parseInt(priceMinParam, 10) : undefined;
|
||||
const priceMaxParam = reqUrl.searchParams.get("priceMax");
|
||||
const priceMax = priceMaxParam ? parseInt(priceMaxParam, 10) : undefined;
|
||||
const maxPages = parseNonNegativeIntegerParam(
|
||||
reqUrl.searchParams,
|
||||
"maxPages",
|
||||
5,
|
||||
);
|
||||
if (maxPages instanceof Response) {
|
||||
return maxPages;
|
||||
}
|
||||
const priceMin = parseDollarPriceParam(reqUrl.searchParams, "priceMin");
|
||||
if (priceMin instanceof Response) {
|
||||
return priceMin;
|
||||
}
|
||||
const priceMax = parseDollarPriceParam(reqUrl.searchParams, "priceMax");
|
||||
if (priceMax instanceof Response) {
|
||||
return priceMax;
|
||||
}
|
||||
const hideUnstableResults =
|
||||
reqUrl.searchParams.get("unstableFilter") === "true";
|
||||
|
||||
const searchOptions = {
|
||||
location: reqUrl.searchParams.get("location") || undefined,
|
||||
category: reqUrl.searchParams.get("category") || undefined,
|
||||
keywords: reqUrl.searchParams.get("keywords") || undefined,
|
||||
sortBy: reqUrl.searchParams.get("sortBy") as
|
||||
| "relevancy"
|
||||
| "date"
|
||||
| "price"
|
||||
| "distance"
|
||||
| undefined,
|
||||
sortOrder: reqUrl.searchParams.get("sortOrder") as
|
||||
| "desc"
|
||||
| "asc"
|
||||
| undefined,
|
||||
sortBy:
|
||||
(reqUrl.searchParams.get("sortBy") as
|
||||
| "relevancy"
|
||||
| "date"
|
||||
| "price"
|
||||
| "distance"
|
||||
| undefined) || undefined,
|
||||
sortOrder:
|
||||
(reqUrl.searchParams.get("sortOrder") as "desc" | "asc" | undefined) ||
|
||||
undefined,
|
||||
maxPages,
|
||||
priceMin,
|
||||
priceMax,
|
||||
cookies: reqUrl.searchParams.get("cookies") || undefined,
|
||||
};
|
||||
|
||||
try {
|
||||
if (hideUnstableResults) {
|
||||
const items = await fetchKijijiItems(
|
||||
SEARCH_QUERY,
|
||||
4, // 4 requests per second for faster scraping
|
||||
"https://www.kijiji.ca",
|
||||
searchOptions,
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
if (items.results.length === 0 && items.unstableResults.length === 0) {
|
||||
return emptySearchResponse(
|
||||
`Kijiji matches ALL words in the query against listing titles. ` +
|
||||
`Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
|
||||
);
|
||||
}
|
||||
return Response.json(items, { status: 200 });
|
||||
}
|
||||
|
||||
const items = await fetchKijijiItems(
|
||||
SEARCH_QUERY,
|
||||
4, // 4 requests per second for faster scraping
|
||||
@@ -52,14 +83,15 @@ export async function kijijiRoute(req: Request): Promise<Response> {
|
||||
searchOptions,
|
||||
{},
|
||||
);
|
||||
if (!items)
|
||||
return Response.json(
|
||||
{ message: "Search didn't return any results!" },
|
||||
{ status: 404 },
|
||||
if (!items || items.length === 0) {
|
||||
return emptySearchResponse(
|
||||
`Kijiji matches ALL words in the query against listing titles. ` +
|
||||
`Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
|
||||
);
|
||||
}
|
||||
return Response.json(items, { status: 200 });
|
||||
} catch (error) {
|
||||
console.error("Kijiji scraping error:", error);
|
||||
logger.error("Kijiji scraping error:", error);
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : "Unknown error occurred";
|
||||
return Response.json({ message: errorMessage }, { status: 400 });
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,13 +1,9 @@
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"lib": ["dom"],
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
},
|
||||
"strict": true,
|
||||
"noEmit": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"include": ["./src", "./test", "../../types/**/*.d.ts"]
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
- Isolate marketplace-specific hacks/selectors inside the owning scraper file unless they are genuinely shared.
|
||||
- If a new helper is scraper-local, keep it local. Do not promote it into `utils` early.
|
||||
- If you change shared types or exports, check downstream imports in both adapter packages.
|
||||
- eBay SplashUI challenge handling needs raw `fetch` for manual redirects and `getSetCookie()`; use `fetchHtml` only once the flow only needs final HTML.
|
||||
|
||||
## Tests
|
||||
|
||||
|
||||
@@ -2,18 +2,24 @@
|
||||
"name": "@marketplace-scrapers/core",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"main": "./src/index.ts",
|
||||
"module": "./src/index.ts",
|
||||
"exports": {
|
||||
".": "./src/index.ts"
|
||||
},
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"typecheck": "bun tsgo"
|
||||
},
|
||||
"dependencies": {
|
||||
"@typescript/native-preview": "catalog:",
|
||||
"argon2-wasm-pro": "1.1.0",
|
||||
"cli-progress": "^3.12.0",
|
||||
"linkedom": "^0.18.12",
|
||||
"unidecode": "^1.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "latest",
|
||||
"@types/unidecode": "^1.1.0",
|
||||
"@types/cli-progress": "^3.11.6"
|
||||
"@types/bun": "catalog:",
|
||||
"@types/cli-progress": "catalog:",
|
||||
"@types/unidecode": "catalog:"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5"
|
||||
|
||||
@@ -39,5 +39,7 @@ export * from "./types/common";
|
||||
// Export shared utilities
|
||||
export * from "./utils/cookies";
|
||||
export * from "./utils/delay";
|
||||
export * from "./utils/ebay-challenge";
|
||||
export * from "./utils/format";
|
||||
export * from "./utils/http";
|
||||
export * from "./utils/unstable";
|
||||
|
||||
@@ -1,10 +1,19 @@
|
||||
import { parseHTML } from "linkedom";
|
||||
import type {
|
||||
HTMLString,
|
||||
UnstableListingBuckets,
|
||||
UnstableListingModeOptions,
|
||||
} from "../types/common";
|
||||
import {
|
||||
type CookieConfig,
|
||||
ensureCookies,
|
||||
formatCookiesForHeader,
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import { solveEbayChallenge } from "../utils/ebay-challenge";
|
||||
import { fetchHtml, HttpError, RateLimitError } from "../utils/http";
|
||||
import { logger } from "../utils/logger";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
|
||||
// eBay cookie configuration
|
||||
const EBAY_COOKIE_CONFIG: CookieConfig = {
|
||||
@@ -32,6 +41,243 @@ export interface EbayListingDetails {
|
||||
address?: string | null;
|
||||
}
|
||||
|
||||
const EBAY_PRICE_TEXT_RE = /^(?:\s*(?:CA|C|US)\s*\$|\s*[$£€¥])/u;
|
||||
const EBAY_ITEM_URL_RE = /^https?:\/\/(?:www\.)?ebay\.(?:ca|com)\/itm\//u;
|
||||
|
||||
function decodeHtmlEntities(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function stripHtml(value: string): string {
|
||||
return decodeHtmlEntities(
|
||||
value.replace(/<[^>]*>/g, " ").replace(/\s+/g, " "),
|
||||
);
|
||||
}
|
||||
|
||||
function getHtmlAttr(tag: string, attrName: string): string | null {
|
||||
const attrMatch = tag.match(
|
||||
new RegExp(`\\s${attrName}=(?:"([^"]*)"|'([^']*)'|([^\\s>]+))`, "iu"),
|
||||
);
|
||||
return attrMatch?.[1] ?? attrMatch?.[2] ?? attrMatch?.[3] ?? null;
|
||||
}
|
||||
|
||||
function normalizeEbayUrl(url: string): string | null {
|
||||
const decodedUrl = decodeHtmlEntities(url);
|
||||
try {
|
||||
const parsed = new URL(decodedUrl, "https://www.ebay.ca");
|
||||
return EBAY_ITEM_URL_RE.test(parsed.href) ? parsed.href : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function toEbayListing(
|
||||
url: string,
|
||||
title: string,
|
||||
priceText: string,
|
||||
): EbayListingDetails | null {
|
||||
const normalizedUrl = normalizeEbayUrl(url);
|
||||
const cleanedTitle = stripHtml(title);
|
||||
const cleanedPrice = stripHtml(priceText);
|
||||
const priceInfo = parseEbayPrice(cleanedPrice);
|
||||
|
||||
if (!normalizedUrl || !cleanedTitle || cleanedTitle === "Shop on eBay") {
|
||||
return null;
|
||||
}
|
||||
if (!priceInfo) return null;
|
||||
|
||||
return {
|
||||
url: normalizedUrl,
|
||||
title: cleanedTitle,
|
||||
listingPrice: {
|
||||
amountFormatted: cleanedPrice,
|
||||
cents: priceInfo.cents,
|
||||
currency: priceInfo.currency,
|
||||
},
|
||||
listingType: "OFFER",
|
||||
listingStatus: "ACTIVE",
|
||||
address: null,
|
||||
};
|
||||
}
|
||||
|
||||
function readObjectString(
|
||||
value: Record<string, unknown>,
|
||||
keys: string[],
|
||||
): string | null {
|
||||
for (const key of keys) {
|
||||
const candidate = value[key];
|
||||
if (typeof candidate === "string" && candidate.trim()) {
|
||||
return candidate.trim();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function readPayloadPrice(value: Record<string, unknown>): string | null {
|
||||
const directPrice = readObjectString(value, [
|
||||
"price",
|
||||
"currentPrice",
|
||||
"displayPrice",
|
||||
]);
|
||||
if (directPrice) return directPrice;
|
||||
|
||||
for (const key of ["price", "currentPrice", "displayPrice", "priceInfo"]) {
|
||||
const candidate = value[key];
|
||||
if (
|
||||
!candidate ||
|
||||
typeof candidate !== "object" ||
|
||||
Array.isArray(candidate)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const priceObject = candidate as Record<string, unknown>;
|
||||
const formatted = readObjectString(priceObject, [
|
||||
"amount",
|
||||
"formatted",
|
||||
"text",
|
||||
]);
|
||||
if (formatted) return formatted;
|
||||
|
||||
const numericValue = priceObject.value;
|
||||
const currency = readObjectString(priceObject, [
|
||||
"currency",
|
||||
"currencyCode",
|
||||
]);
|
||||
if (typeof numericValue === "string" && numericValue.trim()) {
|
||||
return currency ? `${currency} ${numericValue}` : numericValue;
|
||||
}
|
||||
if (typeof numericValue === "number") {
|
||||
return currency ? `${currency} ${numericValue}` : String(numericValue);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function collectPayloadListings(
|
||||
value: unknown,
|
||||
results: EbayListingDetails[],
|
||||
): void {
|
||||
if (!value || typeof value !== "object") return;
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
for (const item of value) {
|
||||
collectPayloadListings(item, results);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const objectValue = value as Record<string, unknown>;
|
||||
const url = readObjectString(objectValue, [
|
||||
"itemWebUrl",
|
||||
"itemUrl",
|
||||
"url",
|
||||
"webUrl",
|
||||
]);
|
||||
const title = readObjectString(objectValue, ["title", "itemTitle", "name"]);
|
||||
const priceText = readPayloadPrice(objectValue);
|
||||
|
||||
if (url && title && priceText) {
|
||||
const listing = toEbayListing(url, title, priceText);
|
||||
if (listing) {
|
||||
results.push(listing);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (const child of Object.values(objectValue)) {
|
||||
collectPayloadListings(child, results);
|
||||
}
|
||||
}
|
||||
|
||||
function parseEmbeddedEbayListings(
|
||||
htmlString: HTMLString,
|
||||
): EbayListingDetails[] {
|
||||
const results: EbayListingDetails[] = [];
|
||||
const payloadMatches = htmlString.matchAll(
|
||||
/data-inlinepayload=(?:"([^"]*)"|'([^']*)'|([^\s>]+))/giu,
|
||||
);
|
||||
|
||||
for (const match of payloadMatches) {
|
||||
const rawPayload = match[1] ?? match[2] ?? match[3];
|
||||
if (!rawPayload) continue;
|
||||
|
||||
try {
|
||||
const decodedPayload = decodeURIComponent(decodeHtmlEntities(rawPayload));
|
||||
collectPayloadListings(JSON.parse(decodedPayload), results);
|
||||
} catch {
|
||||
// eBay inline payloads vary by module; non-JSON payloads are ignored.
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
function parseSCardHtmlListings(htmlString: HTMLString): EbayListingDetails[] {
|
||||
const results: EbayListingDetails[] = [];
|
||||
const cardMatches = htmlString.matchAll(
|
||||
/<div\b[^>]*class=(?:"[^"]*\bs-card\b[^"]*"|'[^']*\bs-card\b[^']*'|[^\s>]*\bs-card\b[^\s>]*)[\s\S]*?(?=<div\b[^>]*class=(?:"[^"]*\bs-card\b[^"]*"|'[^']*\bs-card\b[^']*'|[^\s>]*\bs-card\b[^\s>]*)|<\/body>|<\/html>)/giu,
|
||||
);
|
||||
|
||||
for (const cardMatch of cardMatches) {
|
||||
const cardHtml = cardMatch[0];
|
||||
const linkTag = cardHtml.match(
|
||||
/<a\b[^>]*\bhref=(?:"[^"]*\/itm\/[^"]*"|'[^']*\/itm\/[^']*'|[^\s>]*\/itm\/[^\s>]*)[^>]*>/iu,
|
||||
)?.[0];
|
||||
const titleMatch = cardHtml.match(
|
||||
/<[^>]*\bclass=(?:"[^"]*\bs-card__title\b[^"]*"|'[^']*\bs-card__title\b[^']*'|[^\s>]*\bs-card__title\b[^\s>]*)[^>]*>([\s\S]*?)<\/[^>]+>/iu,
|
||||
);
|
||||
const priceMatch = cardHtml.match(
|
||||
/<[^>]*\bclass=(?:"[^"]*\bs-card__price\b[^"]*"|'[^']*\bs-card__price\b[^']*'|[^\s>]*\bs-card__price\b[^\s>]*)[^>]*>([\s\S]*?)<\/[^>]+>/iu,
|
||||
);
|
||||
|
||||
if (!linkTag || !titleMatch?.[1] || !priceMatch?.[1]) continue;
|
||||
|
||||
const href = getHtmlAttr(linkTag, "href");
|
||||
if (!href) continue;
|
||||
|
||||
const listing = toEbayListing(href, titleMatch[1], priceMatch[1]);
|
||||
if (listing) results.push(listing);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
function dedupeEbayListings(
|
||||
listings: EbayListingDetails[],
|
||||
): EbayListingDetails[] {
|
||||
const results: EbayListingDetails[] = [];
|
||||
const seenUrls = new Set<string>();
|
||||
|
||||
for (const listing of listings) {
|
||||
const canonicalUrl = canonicalizeEbayItemUrl(listing.url);
|
||||
if (seenUrls.has(canonicalUrl)) continue;
|
||||
seenUrls.add(canonicalUrl);
|
||||
results.push(listing);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
function canonicalizeEbayItemUrl(url: string): string {
|
||||
try {
|
||||
const parsed = new URL(url, "https://www.ebay.ca");
|
||||
const match = parsed.pathname.match(/\/itm\/(?:[^/?#]+\/)?\d+/);
|
||||
return match
|
||||
? `${parsed.origin}${match[0]}`
|
||||
: `${parsed.origin}${parsed.pathname}`;
|
||||
} catch {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
/**
|
||||
@@ -56,7 +302,7 @@ function parseEbayPrice(
|
||||
const cents = Math.round(dollars * 100);
|
||||
|
||||
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
|
||||
let currency = "USD"; // Default
|
||||
let currency = "CAD"; // Default for ebay.ca
|
||||
|
||||
if (
|
||||
cleaned.toUpperCase().includes("CAD") ||
|
||||
@@ -64,24 +310,23 @@ function parseEbayPrice(
|
||||
cleaned.includes("C $")
|
||||
) {
|
||||
currency = "CAD";
|
||||
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
|
||||
} else if (
|
||||
cleaned.toUpperCase().includes("USD") ||
|
||||
cleaned.toUpperCase().includes("US $") ||
|
||||
cleaned.toUpperCase().includes("US$")
|
||||
) {
|
||||
currency = "USD";
|
||||
} else if (cleaned.includes("£")) {
|
||||
currency = "GBP";
|
||||
} else if (cleaned.includes("€")) {
|
||||
currency = "EUR";
|
||||
} else if (cleaned.includes("¥")) {
|
||||
currency = "JPY";
|
||||
}
|
||||
|
||||
return { cents, currency };
|
||||
}
|
||||
|
||||
class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status: number,
|
||||
public readonly url: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
|
||||
/**
|
||||
@@ -93,8 +338,14 @@ function parseEbayListings(
|
||||
exclusions: string[],
|
||||
strictMode: boolean,
|
||||
): EbayListingDetails[] {
|
||||
const embeddedListings = parseEmbeddedEbayListings(htmlString);
|
||||
if (embeddedListings.length > 0) {
|
||||
return dedupeEbayListings(embeddedListings);
|
||||
}
|
||||
|
||||
const { document } = parseHTML(htmlString);
|
||||
const results: EbayListingDetails[] = [];
|
||||
const seenUrls = new Set<string>();
|
||||
|
||||
// Find all listing links by looking for eBay item URLs (/itm/)
|
||||
const linkElements = document.querySelectorAll('a[href*="itm/"]');
|
||||
@@ -109,9 +360,12 @@ function parseEbayListings(
|
||||
if (!href.startsWith("http")) {
|
||||
href = href.startsWith("//")
|
||||
? `https:${href}`
|
||||
: `https://www.ebay.com${href}`;
|
||||
: `https://www.ebay.ca${href}`;
|
||||
}
|
||||
|
||||
const canonicalUrl = canonicalizeEbayItemUrl(href);
|
||||
if (seenUrls.has(canonicalUrl)) continue;
|
||||
|
||||
// Find the container - go up several levels to find the item container
|
||||
// Modern eBay uses complex nested structures (often 5-10 levels deep)
|
||||
let container: Element | null = linkElement;
|
||||
@@ -173,16 +427,18 @@ function parseEbayListings(
|
||||
"opens in a new window or tab",
|
||||
];
|
||||
|
||||
let shortened = false;
|
||||
for (const uiString of uiStrings) {
|
||||
const uiIndex = title.indexOf(uiString);
|
||||
if (uiIndex !== -1) {
|
||||
title = title.substring(0, uiIndex).trim();
|
||||
shortened = true;
|
||||
break; // Only remove one UI string per title
|
||||
}
|
||||
}
|
||||
|
||||
// If the title became empty or too short after cleaning, skip this item
|
||||
if (title.length < 10) {
|
||||
// If the title was shortened by UI cleaning and became too short, skip this item
|
||||
if (shortened && title.length < 10) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -215,7 +471,6 @@ function parseEbayListings(
|
||||
!text.includes("core") &&
|
||||
!text.includes("ram") &&
|
||||
!text.includes("ssd") &&
|
||||
!/\d{4}/.test(text) && // Avoid years like "2024"
|
||||
!text.includes('"') // Avoid measurements
|
||||
) {
|
||||
priceElement = el;
|
||||
@@ -239,15 +494,10 @@ function parseEbayListings(
|
||||
);
|
||||
|
||||
// Filter to only elements that actually contain prices (not labels)
|
||||
const actualPrices: HTMLElement[] = [];
|
||||
const actualPrices: Element[] = [];
|
||||
for (const el of allPriceElements) {
|
||||
const text = el.textContent?.trim();
|
||||
if (
|
||||
text &&
|
||||
/^\s*[$£€¥]/u.test(text) &&
|
||||
text.length < 50 &&
|
||||
!/\d{4}/.test(text)
|
||||
) {
|
||||
if (text && EBAY_PRICE_TEXT_RE.test(text) && text.length < 50) {
|
||||
actualPrices.push(el);
|
||||
}
|
||||
}
|
||||
@@ -271,11 +521,10 @@ function parseEbayListings(
|
||||
|
||||
if (nonStrikethroughPrices.length > 0) {
|
||||
// Use the first non-strikethrough price (sale price)
|
||||
priceElement = nonStrikethroughPrices[0];
|
||||
priceElement = nonStrikethroughPrices[0] ?? null;
|
||||
} else {
|
||||
// Fallback: use the last price (likely the most current)
|
||||
const lastPrice = actualPrices[actualPrices.length - 1];
|
||||
priceElement = lastPrice;
|
||||
priceElement = actualPrices[actualPrices.length - 1] ?? null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -323,33 +572,173 @@ function parseEbayListings(
|
||||
};
|
||||
|
||||
results.push(listing);
|
||||
seenUrls.add(canonicalUrl);
|
||||
} catch (err) {
|
||||
console.warn(`Error parsing eBay listing: ${err}`);
|
||||
logger.warn(`Error parsing eBay listing: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
if (results.length > 0) {
|
||||
return results;
|
||||
}
|
||||
|
||||
return dedupeEbayListings(
|
||||
parseSCardHtmlListings(htmlString).filter((listing) => {
|
||||
if (
|
||||
exclusions.some((exclusion) =>
|
||||
listing.title.toLowerCase().includes(exclusion.toLowerCase()),
|
||||
)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (
|
||||
!strictMode ||
|
||||
keywords.some((keyword) =>
|
||||
listing.title.toLowerCase().includes(keyword.toLowerCase()),
|
||||
)
|
||||
);
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
// ----------------------------- Cookie Loading -----------------------------
|
||||
// ----------------------------- Session & Challenge -----------------------------
|
||||
|
||||
/**
|
||||
* Load eBay cookies from EBAY_COOKIE
|
||||
* Load eBay cookies from EBAY_COOKIE env var
|
||||
*/
|
||||
async function loadEbayCookies(): Promise<string | undefined> {
|
||||
try {
|
||||
const cookies = await ensureCookies(EBAY_COOKIE_CONFIG);
|
||||
return formatCookiesForHeader(cookies, "www.ebay.ca");
|
||||
} catch {
|
||||
console.warn(
|
||||
logger.warn(
|
||||
"No valid eBay cookies found in EBAY_COOKIE. eBay may block requests without a raw Cookie header string.",
|
||||
);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
const EBAY_UA =
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
||||
|
||||
/**
|
||||
* Visit eBay homepage to collect Akamai fingerprinting cookies.
|
||||
* These are required to pass the edge layer before any search request.
|
||||
*/
|
||||
async function warmEbaySession(): Promise<string | undefined> {
|
||||
try {
|
||||
const res = await fetch("https://www.ebay.ca", {
|
||||
headers: {
|
||||
"User-Agent": EBAY_UA,
|
||||
Accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
|
||||
},
|
||||
redirect: "manual",
|
||||
});
|
||||
|
||||
if (!res.ok) return undefined;
|
||||
|
||||
const setCookies = res.headers.getSetCookie?.() ?? [];
|
||||
const jar: Record<string, string> = {};
|
||||
for (const header of setCookies) {
|
||||
const match = header.match(/^([^=]+)=([^;]+)/);
|
||||
if (match?.[1] && match[2]) jar[match[1]] = match[2];
|
||||
}
|
||||
|
||||
const cookieKeys = Object.keys(jar);
|
||||
if (cookieKeys.length === 0) return undefined;
|
||||
|
||||
return cookieKeys.map((k) => `${k}=${jar[k] ?? ""}`).join("; ");
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function mergeCookies(
|
||||
base: string,
|
||||
...additions: (string | undefined)[]
|
||||
): string {
|
||||
const jar: Record<string, string> = {};
|
||||
const all = [base, ...additions.filter(Boolean)] as string[];
|
||||
for (const str of all) {
|
||||
for (const pair of str.split(";")) {
|
||||
const eq = pair.indexOf("=");
|
||||
if (eq > 0) {
|
||||
jar[pair.substring(0, eq).trim()] = pair.substring(eq + 1).trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
return Object.entries(jar)
|
||||
.map(([k, v]) => `${k}=${v}`)
|
||||
.join("; ");
|
||||
}
|
||||
|
||||
function collectResponseCookies(res: Response, jar: Record<string, string>) {
|
||||
for (const header of res.headers.getSetCookie?.() ?? []) {
|
||||
const match = header.match(/^([^=]+)=([^;]+)/);
|
||||
if (match?.[1] && match[2]) jar[match[1]] = match[2];
|
||||
}
|
||||
}
|
||||
|
||||
function cookiesToString(jar: Record<string, string>): string {
|
||||
return Object.entries(jar)
|
||||
.map(([k, v]) => `${k}=${v}`)
|
||||
.join("; ");
|
||||
}
|
||||
|
||||
const CHALLENGE_REDIRECT = 307;
|
||||
const CHALLENGE_MARKER = "splashui/challenge";
|
||||
|
||||
function isChallengeRedirect(res: Response): boolean {
|
||||
return (
|
||||
res.status === CHALLENGE_REDIRECT &&
|
||||
(res.headers.get("location") ?? "").includes(CHALLENGE_MARKER)
|
||||
);
|
||||
}
|
||||
|
||||
function isChallengeHtml(html: string): boolean {
|
||||
return (
|
||||
html.length < 50000 &&
|
||||
(html.includes("_crefId") || html.includes("_cdetail"))
|
||||
);
|
||||
}
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND: number | undefined,
|
||||
opts:
|
||||
| {
|
||||
minPrice?: number;
|
||||
maxPrice?: number;
|
||||
strictMode?: boolean;
|
||||
exclusions?: string[];
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
maxItems?: number;
|
||||
}
|
||||
| undefined,
|
||||
unstableMode: { hideUnstableResults: true },
|
||||
): Promise<UnstableListingBuckets<EbayListingDetails>>;
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
opts?: {
|
||||
minPrice?: number;
|
||||
maxPrice?: number;
|
||||
strictMode?: boolean;
|
||||
exclusions?: string[];
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
maxItems?: number;
|
||||
},
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<EbayListingDetails[]>;
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
@@ -361,8 +750,12 @@ export default async function fetchEbayItems(
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
maxItems?: number;
|
||||
} = {},
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1;
|
||||
|
||||
const {
|
||||
minPrice = 0,
|
||||
maxPrice = Number.MAX_SAFE_INTEGER,
|
||||
@@ -371,9 +764,26 @@ export default async function fetchEbayItems(
|
||||
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
||||
buyItNowOnly = true,
|
||||
canadaOnly = true,
|
||||
maxItems,
|
||||
} = opts;
|
||||
|
||||
const cookies = await loadEbayCookies();
|
||||
const finalizeResults = (
|
||||
listings: EbayListingDetails[],
|
||||
): EbayListingDetails[] | UnstableListingBuckets<EbayListingDetails> => {
|
||||
const limitedListings =
|
||||
maxItems !== undefined ? listings.slice(0, maxItems) : listings;
|
||||
|
||||
if (!unstableMode.hideUnstableResults) {
|
||||
return limitedListings;
|
||||
}
|
||||
|
||||
return classifyUnstableListings(limitedListings);
|
||||
};
|
||||
|
||||
// Collect cookies from env var + warm-up session
|
||||
const envCookies = await loadEbayCookies();
|
||||
const warmCookies = await warmEbaySession();
|
||||
const baseCookies = mergeCookies(envCookies ?? "", warmCookies);
|
||||
|
||||
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
||||
const urlParams = new URLSearchParams({
|
||||
@@ -392,38 +802,109 @@ export default async function fetchEbayItems(
|
||||
|
||||
const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`;
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
|
||||
|
||||
console.log(`Fetching eBay search: ${searchUrl}`);
|
||||
logger.log(`Fetching eBay search: ${searchUrl}`);
|
||||
|
||||
try {
|
||||
// Use custom headers modeled after real browser requests to bypass bot detection
|
||||
const headers: Record<string, string> = {
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
|
||||
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br, zstd",
|
||||
const searchHeaders: Record<string, string> = {
|
||||
"User-Agent": EBAY_UA,
|
||||
Accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
|
||||
Referer: "https://www.ebay.ca/",
|
||||
Connection: "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"Sec-Fetch-User": "?1",
|
||||
Priority: "u=0, i",
|
||||
};
|
||||
|
||||
// Add cookies if available (helps bypass bot detection)
|
||||
if (cookies) {
|
||||
headers.Cookie = cookies;
|
||||
if (baseCookies) {
|
||||
searchHeaders.Cookie = baseCookies;
|
||||
}
|
||||
|
||||
const res = await fetch(searchUrl, {
|
||||
// Step 1: Make search request (follow redirects for challenge flow)
|
||||
let res = await fetch(searchUrl, {
|
||||
method: "GET",
|
||||
headers,
|
||||
headers: searchHeaders,
|
||||
redirect: "manual",
|
||||
});
|
||||
|
||||
const cookieJar: Record<string, string> = {};
|
||||
|
||||
// Collect cookies from homepage warm-up
|
||||
if (baseCookies) {
|
||||
for (const pair of baseCookies.split(";")) {
|
||||
const eq = pair.indexOf("=");
|
||||
if (eq > 0) {
|
||||
cookieJar[pair.substring(0, eq).trim()] = pair
|
||||
.substring(eq + 1)
|
||||
.trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Follow challenge redirect if present
|
||||
if (isChallengeRedirect(res)) {
|
||||
const chalUrl = res.headers.get("location") ?? "";
|
||||
collectResponseCookies(res, cookieJar);
|
||||
|
||||
logger.log("Challenge detected, fetching challenge page...");
|
||||
res = await fetch(chalUrl, {
|
||||
headers: { ...searchHeaders, Cookie: cookiesToString(cookieJar) },
|
||||
redirect: "manual",
|
||||
});
|
||||
collectResponseCookies(res, cookieJar);
|
||||
}
|
||||
|
||||
// Step 3: If response is challenge HTML, solve and submit
|
||||
const responseHtml = await res.text();
|
||||
|
||||
if (isChallengeHtml(responseHtml)) {
|
||||
logger.log("Solving challenge...");
|
||||
const result = await solveEbayChallenge(
|
||||
responseHtml,
|
||||
cookiesToString(cookieJar),
|
||||
);
|
||||
|
||||
if (result) {
|
||||
// Merge answer cookies into jar
|
||||
if (baseCookies) {
|
||||
searchHeaders.Cookie = mergeCookies(baseCookies, result.cookies);
|
||||
} else {
|
||||
searchHeaders.Cookie = result.cookies;
|
||||
}
|
||||
|
||||
logger.log("Challenge solved, retrying search...");
|
||||
|
||||
// Delay briefly before retry
|
||||
await delay(DELAY_MS);
|
||||
|
||||
const retryHtml = await fetchHtml(searchUrl, DELAY_MS, {
|
||||
headers: searchHeaders,
|
||||
});
|
||||
|
||||
const listings = parseEbayListings(
|
||||
retryHtml,
|
||||
keywords,
|
||||
exclusions,
|
||||
strictMode,
|
||||
);
|
||||
|
||||
const filteredListings = listings.filter((listing) => {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
return (
|
||||
typeof cents === "number" && cents >= minPrice && cents <= maxPrice
|
||||
);
|
||||
});
|
||||
|
||||
logger.log(
|
||||
`Parsed ${filteredListings.length} eBay listings (after challenge).`,
|
||||
);
|
||||
return finalizeResults(filteredListings);
|
||||
}
|
||||
|
||||
logger.warn("Challenge solve failed, returning empty results.");
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
// Step 4: Normal flow — no challenge
|
||||
if (!res.ok) {
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
@@ -432,33 +913,32 @@ export default async function fetchEbayItems(
|
||||
);
|
||||
}
|
||||
|
||||
const searchHtml = await res.text();
|
||||
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
||||
await delay(DELAY_MS);
|
||||
|
||||
console.log(`\nParsing eBay listings...`);
|
||||
logger.log(`\nParsing eBay listings...`);
|
||||
|
||||
const listings = parseEbayListings(
|
||||
searchHtml,
|
||||
responseHtml,
|
||||
keywords,
|
||||
exclusions,
|
||||
strictMode,
|
||||
);
|
||||
|
||||
// Filter by price range (additional safety check)
|
||||
const filteredListings = listings.filter((listing) => {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
return cents && cents >= minPrice && cents <= maxPrice;
|
||||
return (
|
||||
typeof cents === "number" && cents >= minPrice && cents <= maxPrice
|
||||
);
|
||||
});
|
||||
|
||||
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
||||
return filteredListings;
|
||||
logger.log(`Parsed ${filteredListings.length} eBay listings.`);
|
||||
return finalizeResults(filteredListings);
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
`Failed to fetch eBay search (${err.status}): ${err.message}`,
|
||||
if (err instanceof HttpError || err instanceof RateLimitError) {
|
||||
logger.warn(
|
||||
`Failed to fetch eBay search (${err instanceof HttpError ? err.statusCode : 429}): ${err.message}`,
|
||||
);
|
||||
return [];
|
||||
return finalizeResults([]);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
@@ -1,23 +1,35 @@
|
||||
import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import type { HTMLString } from "../types/common";
|
||||
import type {
|
||||
HTMLString,
|
||||
UnstableListingBuckets,
|
||||
UnstableListingModeOptions,
|
||||
} from "../types/common";
|
||||
import {
|
||||
type Cookie,
|
||||
type CookieConfig,
|
||||
ensureCookies,
|
||||
formatCookiesForHeader,
|
||||
loadCookiesOptional,
|
||||
parseCookieString,
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import {
|
||||
buildFacebookHeaders,
|
||||
detectFacebookChallenge,
|
||||
warmFacebookSession,
|
||||
} from "../utils/facebook-challenge";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import { isRecord } from "../utils/http";
|
||||
import { fetchHtml, HttpError, isRecord, RateLimitError } from "../utils/http";
|
||||
import { logger } from "../utils/logger";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
|
||||
/**
|
||||
* Facebook Marketplace Scraper
|
||||
*
|
||||
* Note: Facebook Marketplace requires authentication cookies for full access.
|
||||
* This implementation will return limited or no results without proper authentication.
|
||||
* This is by design to respect Facebook's authentication requirements.
|
||||
* Facebook Marketplace returns search results without authentication when
|
||||
* proper browser headers are sent. Prices and seller details are hidden on
|
||||
* search results but are available on individual item pages even without
|
||||
* auth cookies. For full-price search results, provide FACEBOOK_COOKIE.
|
||||
*/
|
||||
|
||||
// Facebook cookie configuration
|
||||
@@ -81,7 +93,7 @@ interface FacebookMarketplaceItem {
|
||||
__typename: "GroupCommerceProductItem";
|
||||
|
||||
// Listing content
|
||||
marketplace_listing_title: string;
|
||||
marketplace_listing_title?: string;
|
||||
redacted_description?: {
|
||||
text: string;
|
||||
};
|
||||
@@ -94,7 +106,7 @@ interface FacebookMarketplaceItem {
|
||||
listing_price?: {
|
||||
amount: string;
|
||||
currency: string;
|
||||
amount_with_offset: string;
|
||||
amount_with_offset?: string;
|
||||
};
|
||||
|
||||
// Location
|
||||
@@ -122,9 +134,9 @@ interface FacebookMarketplaceItem {
|
||||
|
||||
// Seller information
|
||||
marketplace_listing_seller?: {
|
||||
__typename: "User";
|
||||
id: string;
|
||||
name: string;
|
||||
__typename?: "User";
|
||||
id?: string;
|
||||
name?: string;
|
||||
profile_picture?: {
|
||||
uri: string;
|
||||
};
|
||||
@@ -213,17 +225,6 @@ export async function ensureFacebookCookies(): Promise<Cookie[]> {
|
||||
return ensureCookies(FACEBOOK_COOKIE_CONFIG);
|
||||
}
|
||||
|
||||
class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status: number,
|
||||
public readonly url: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Extraction Metrics -----------------------------
|
||||
|
||||
/**
|
||||
@@ -255,113 +256,28 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
|
||||
successRate < 0.8 &&
|
||||
!extractionStats.lastApiChangeDetected
|
||||
) {
|
||||
console.warn(
|
||||
logger.warn(
|
||||
"Facebook Marketplace API extraction success rate dropped below 80%. This may indicate API changes.",
|
||||
);
|
||||
extractionStats.lastApiChangeDetected = new Date();
|
||||
}
|
||||
|
||||
if (!success && itemId) {
|
||||
console.warn(`Facebook API extraction failed for item ${itemId}`);
|
||||
logger.warn(`Facebook API extraction failed for item ${itemId}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- HTTP Client -----------------------------
|
||||
|
||||
/**
|
||||
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls.
|
||||
- Retries on 429 and 5xx
|
||||
- Respects X-RateLimit-Reset when present (seconds)
|
||||
- Supports custom cookies for Facebook authentication
|
||||
*/
|
||||
async function fetchHtml(
|
||||
url: string,
|
||||
DELAY_MS: number,
|
||||
opts?: {
|
||||
maxRetries?: number;
|
||||
retryBaseMs?: number;
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
cookies?: string;
|
||||
},
|
||||
): Promise<{ html: HTMLString; responseUrl: string }> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const headers: Record<string, string> = {
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"cache-control": "no-cache",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"sec-fetch-dest": "document",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-site": "none",
|
||||
"sec-fetch-user": "?1",
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
};
|
||||
|
||||
// Add cookies if provided
|
||||
if (opts?.cookies) {
|
||||
headers.cookie = opts.cookies;
|
||||
}
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: "GET",
|
||||
headers,
|
||||
});
|
||||
|
||||
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
||||
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
||||
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
||||
|
||||
if (!res.ok) {
|
||||
// Respect 429 reset if provided
|
||||
if (res.status === 429) {
|
||||
const resetSeconds = rateLimitReset
|
||||
? Number(rateLimitReset)
|
||||
: Number.NaN;
|
||||
const waitMs = Number.isFinite(resetSeconds)
|
||||
? Math.max(0, resetSeconds * 1000)
|
||||
: (attempt + 1) * retryBaseMs;
|
||||
await delay(waitMs);
|
||||
continue;
|
||||
}
|
||||
// For Facebook, 400 often means authentication required
|
||||
// Don't retry 4xx client errors except 429
|
||||
if (res.status >= 400 && res.status < 500 && res.status !== 429) {
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
|
||||
res.status,
|
||||
url,
|
||||
);
|
||||
}
|
||||
// Retry on 5xx
|
||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||
await delay((attempt + 1) * retryBaseMs);
|
||||
continue;
|
||||
}
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url,
|
||||
);
|
||||
}
|
||||
|
||||
const html = await res.text();
|
||||
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
||||
await delay(DELAY_MS);
|
||||
return { html, responseUrl: res.url || url };
|
||||
} catch (err) {
|
||||
if (attempt >= maxRetries) throw err;
|
||||
await delay((attempt + 1) * retryBaseMs);
|
||||
function createFacebookHeaders(cookies: string): Record<string, string> {
|
||||
const jar: Record<string, string> = {};
|
||||
if (cookies) {
|
||||
for (const pair of cookies.split(";")) {
|
||||
const [name, ...rest] = pair.trim().split("=");
|
||||
if (name && rest.length > 0) jar[name.trim()] = rest.join("=").trim();
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("Exhausted retries without response");
|
||||
return buildFacebookHeaders(jar);
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
@@ -371,13 +287,29 @@ export type FacebookResponseKind =
|
||||
| "item"
|
||||
| "auth_gated"
|
||||
| "unavailable"
|
||||
| "checkpoint"
|
||||
| "unknown";
|
||||
|
||||
export function classifyFacebookResponse(
|
||||
htmlString: HTMLString,
|
||||
responseUrl: string,
|
||||
status = 200,
|
||||
) {
|
||||
const challengeType = detectFacebookChallenge(
|
||||
status,
|
||||
htmlString,
|
||||
responseUrl,
|
||||
);
|
||||
if (challengeType === "checkpoint") {
|
||||
return {
|
||||
kind: "checkpoint" as const,
|
||||
authGated: false,
|
||||
unavailable: false,
|
||||
};
|
||||
}
|
||||
|
||||
const authGated =
|
||||
challengeType === "login_wall" ||
|
||||
responseUrl.includes("/login/") ||
|
||||
htmlString.includes("You must log in") ||
|
||||
htmlString.includes("log in to continue");
|
||||
@@ -391,7 +323,11 @@ export function classifyFacebookResponse(
|
||||
htmlString.includes("This listing is no longer available") ||
|
||||
htmlString.includes("listing has been removed");
|
||||
if (unavailable) {
|
||||
return { kind: "unavailable" as const, authGated: false, unavailable: true };
|
||||
return {
|
||||
kind: "unavailable" as const,
|
||||
authGated: false,
|
||||
unavailable: true,
|
||||
};
|
||||
}
|
||||
|
||||
if (responseUrl.includes("/marketplace/item/")) {
|
||||
@@ -438,7 +374,8 @@ function isFacebookSearchEdgeArray(value: unknown): value is FacebookEdge[] {
|
||||
Array.isArray(value) &&
|
||||
value.length > 0 &&
|
||||
value.every(
|
||||
(edge) => isRecord(edge) && isRecord(edge.node) && isRecord(edge.node.listing),
|
||||
(edge) =>
|
||||
isRecord(edge) && isRecord(edge.node) && isRecord(edge.node.listing),
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -535,8 +472,7 @@ function scoreMarketplaceItemPath(path: string[]): number {
|
||||
|
||||
if (
|
||||
path.some(
|
||||
(segment) =>
|
||||
segment.includes("recommend") || segment.includes("related"),
|
||||
(segment) => segment.includes("recommend") || segment.includes("related"),
|
||||
)
|
||||
) {
|
||||
score -= 10;
|
||||
@@ -550,7 +486,9 @@ function collectMarketplaceItemCandidates(
|
||||
path: string[] = [],
|
||||
): FacebookMarketplaceItemMatch[] {
|
||||
if (Array.isArray(candidate)) {
|
||||
return candidate.flatMap((item) => collectMarketplaceItemCandidates(item, path));
|
||||
return candidate.flatMap((item) =>
|
||||
collectMarketplaceItemCandidates(item, path),
|
||||
);
|
||||
}
|
||||
|
||||
if (!isRecord(candidate)) {
|
||||
@@ -611,7 +549,9 @@ function extractRenderedText(node: ParentNode, selector: string): string[] {
|
||||
.filter((text): text is string => Boolean(text));
|
||||
}
|
||||
|
||||
function extractMarketplaceItemIdFromElement(element: Element | null): string | null {
|
||||
function extractMarketplaceItemIdFromElement(
|
||||
element: Element | null,
|
||||
): string | null {
|
||||
const href = element?.getAttribute("href") || "";
|
||||
return href.match(FACEBOOK_ITEM_HREF_RE)?.[1] ?? null;
|
||||
}
|
||||
@@ -649,7 +589,9 @@ function extractFacebookPermalinkItemId(document: Document): string | null {
|
||||
return extractMarketplaceItemIdFromElement(itemLinks.at(-1) ?? null);
|
||||
}
|
||||
|
||||
function extractFacebookDescriptionText(document: Document): string | undefined {
|
||||
function extractFacebookDescriptionText(
|
||||
document: Document,
|
||||
): string | undefined {
|
||||
const labels = Array.from(document.querySelectorAll("div, span, h2, h3, p"));
|
||||
|
||||
for (const label of labels) {
|
||||
@@ -742,7 +684,10 @@ function extractFacebookItemHtmlFallback(
|
||||
const priceText = texts.find((text) => FACEBOOK_PRICE_TEXT_RE.test(text));
|
||||
const parsedPrice = priceText ? parseFacebookRenderedPrice(priceText) : null;
|
||||
const location = texts.find(
|
||||
(text) => text !== title && text !== priceText && FACEBOOK_LOCATION_TEXT_RE.test(text),
|
||||
(text) =>
|
||||
text !== title &&
|
||||
text !== priceText &&
|
||||
FACEBOOK_LOCATION_TEXT_RE.test(text),
|
||||
);
|
||||
const description = extractFacebookDescriptionText(document);
|
||||
|
||||
@@ -790,18 +735,18 @@ export function extractFacebookMarketplaceData(
|
||||
if (htmlString.includes("XCometMarketplaceSearchController")) {
|
||||
const htmlFallback = extractFacebookMarketplaceHtmlFallback(htmlString);
|
||||
if (htmlFallback?.length) {
|
||||
console.log(
|
||||
logger.log(
|
||||
`Successfully parsed ${htmlFallback.length} Facebook marketplace listings from rendered HTML fallback`,
|
||||
);
|
||||
return htmlFallback;
|
||||
}
|
||||
}
|
||||
|
||||
console.warn("No marketplace data found in HTML response");
|
||||
logger.warn("No marketplace data found in HTML response");
|
||||
return null;
|
||||
}
|
||||
|
||||
console.log(
|
||||
logger.log(
|
||||
`Successfully parsed ${bestEdges.length} Facebook marketplace listings`,
|
||||
);
|
||||
return bestEdges.map((edge) => ({ node: edge.node }));
|
||||
@@ -824,7 +769,8 @@ export function extractFacebookItemData(
|
||||
if (
|
||||
!bestMatch ||
|
||||
match.score > bestMatch.score ||
|
||||
(match.score === bestMatch.score && match.path.length < bestMatch.path.length)
|
||||
(match.score === bestMatch.score &&
|
||||
match.path.length < bestMatch.path.length)
|
||||
) {
|
||||
bestMatch = match;
|
||||
}
|
||||
@@ -835,6 +781,22 @@ export function extractFacebookItemData(
|
||||
return bestMatch.item;
|
||||
}
|
||||
|
||||
// Try marketplace_product_details_page.target path (current item page structure)
|
||||
for (const candidate of candidates) {
|
||||
const detailsPage = findKeyInObject(
|
||||
candidate,
|
||||
"marketplace_product_details_page",
|
||||
) as Record<string, unknown> | undefined;
|
||||
const target = detailsPage?.target as Record<string, unknown> | undefined;
|
||||
if (
|
||||
target &&
|
||||
typeof target.id === "string" &&
|
||||
typeof target.marketplace_listing_title === "string"
|
||||
) {
|
||||
return target as unknown as FacebookMarketplaceItem;
|
||||
}
|
||||
}
|
||||
|
||||
if (htmlString.includes("XCometMarketplacePermalinkController")) {
|
||||
return extractFacebookItemHtmlFallback(htmlString);
|
||||
}
|
||||
@@ -842,6 +804,25 @@ export function extractFacebookItemData(
|
||||
return null;
|
||||
}
|
||||
|
||||
function findKeyInObject(obj: unknown, targetKey: string): unknown {
|
||||
if (obj == null) return undefined;
|
||||
if (Array.isArray(obj)) {
|
||||
for (const item of obj) {
|
||||
const found = findKeyInObject(item, targetKey);
|
||||
if (found !== undefined) return found;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
if (typeof obj !== "object") return undefined;
|
||||
const record = obj as Record<string, unknown>;
|
||||
if (targetKey in record) return record[targetKey];
|
||||
for (const [, value] of Object.entries(record)) {
|
||||
const found = findKeyInObject(value, targetKey);
|
||||
if (found !== undefined) return found;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
Parse Facebook marketplace search results into ListingDetails[]
|
||||
*/
|
||||
@@ -873,35 +854,25 @@ export function parseFacebookAds(
|
||||
: priceObj.amount;
|
||||
cents = Math.round(dollars * 100);
|
||||
} else if (priceObj.amount_with_offset_in_currency != null) {
|
||||
// Fallback: try to extract cents from amount_with_offset_in_currency
|
||||
// This appears to use some exchange rate/multiplier format
|
||||
const encodedAmount = Number(priceObj.amount_with_offset_in_currency);
|
||||
if (!Number.isNaN(encodedAmount) && encodedAmount > 0) {
|
||||
// Estimate roughly - this field doesn't contain real cents
|
||||
// Use formatted_amount to get the actual dollar amount
|
||||
if (priceObj.formatted_amount) {
|
||||
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
|
||||
if (match) {
|
||||
const dollars = Number.parseFloat(match[0].replace(",", ""));
|
||||
if (!Number.isNaN(dollars)) {
|
||||
cents = Math.round(dollars * 100);
|
||||
} else {
|
||||
cents = encodedAmount; // fallback
|
||||
}
|
||||
} else {
|
||||
cents = encodedAmount; // fallback
|
||||
}
|
||||
} else {
|
||||
cents = encodedAmount; // fallback
|
||||
}
|
||||
} else {
|
||||
continue; // Invalid price
|
||||
}
|
||||
if (!priceObj.formatted_amount) continue;
|
||||
|
||||
const match = priceObj.formatted_amount.match(/[\d,]+\.?\d*/);
|
||||
if (!match) continue;
|
||||
|
||||
const dollars = Number.parseFloat(match[0].replace(/,/g, ""));
|
||||
if (Number.isNaN(dollars)) continue;
|
||||
|
||||
cents = Math.round(dollars * 100);
|
||||
} else if (
|
||||
typeof priceObj.formatted_amount === "string" &&
|
||||
priceObj.formatted_amount.toUpperCase() === "FREE"
|
||||
) {
|
||||
cents = 0;
|
||||
} else {
|
||||
continue; // No price available
|
||||
}
|
||||
|
||||
if (!Number.isFinite(cents) || cents <= 0) continue;
|
||||
if (!Number.isFinite(cents) || cents < 0) continue;
|
||||
|
||||
// Extract address from location data if available
|
||||
const cityName =
|
||||
@@ -960,7 +931,9 @@ export function parseFacebookAds(
|
||||
};
|
||||
|
||||
results.push(listingDetails);
|
||||
} catch {}
|
||||
} catch (error) {
|
||||
logger.warn("Failed to parse Facebook ad:", error);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
@@ -980,13 +953,13 @@ export function parseFacebookItem(
|
||||
const url = `https://www.facebook.com/marketplace/item/${item.id}`;
|
||||
|
||||
// Extract price information
|
||||
let cents = 0;
|
||||
let cents: number | undefined;
|
||||
let currency = "CAD"; // Default
|
||||
let amountFormatted = item.formatted_price?.text || "FREE";
|
||||
let amountFormatted = item.formatted_price?.text;
|
||||
|
||||
if (item.listing_price) {
|
||||
currency = item.listing_price.currency || "CAD";
|
||||
if (item.listing_price.amount && item.listing_price.amount !== "0.00") {
|
||||
if (item.listing_price.amount != null) {
|
||||
const amount = Number.parseFloat(item.listing_price.amount);
|
||||
if (!Number.isNaN(amount)) {
|
||||
cents = Math.round(amount * 100);
|
||||
@@ -1033,6 +1006,13 @@ export function parseFacebookItem(
|
||||
listingType = "vehicle";
|
||||
}
|
||||
|
||||
if (cents == null || !amountFormatted) {
|
||||
if (!listingStatus || listingStatus === "ACTIVE") return null;
|
||||
|
||||
cents = 0;
|
||||
amountFormatted = item.formatted_price?.text || "PRICE_UNAVAILABLE";
|
||||
}
|
||||
|
||||
const listingDetails: FacebookListingDetails = {
|
||||
url,
|
||||
title,
|
||||
@@ -1053,31 +1033,66 @@ export function parseFacebookItem(
|
||||
|
||||
return listingDetails;
|
||||
} catch (error) {
|
||||
console.warn(`Failed to parse Facebook item ${item.id}:`, error);
|
||||
logger.warn(`Failed to parse Facebook item ${item.id}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND: number | undefined,
|
||||
LOCATION: string | undefined,
|
||||
MAX_ITEMS: number | undefined,
|
||||
unstableMode: { hideUnstableResults: true },
|
||||
): Promise<UnstableListingBuckets<FacebookListingDetails>>;
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
LOCATION?: string,
|
||||
MAX_ITEMS?: number,
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<FacebookListingDetails[]>;
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
LOCATION = "toronto",
|
||||
MAX_ITEMS = 25,
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const cookies = await ensureFacebookCookies();
|
||||
const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1;
|
||||
|
||||
const finalizeResults = (
|
||||
listings: FacebookListingDetails[],
|
||||
):
|
||||
| FacebookListingDetails[]
|
||||
| UnstableListingBuckets<FacebookListingDetails> => {
|
||||
if (!unstableMode.hideUnstableResults) {
|
||||
return listings.slice(0, MAX_ITEMS);
|
||||
}
|
||||
|
||||
const classified = classifyUnstableListings(listings);
|
||||
return {
|
||||
results: classified.results.slice(0, MAX_ITEMS),
|
||||
unstableResults: classified.unstableResults,
|
||||
};
|
||||
};
|
||||
|
||||
const warmupCookies = await warmFacebookSession();
|
||||
const warmupHeader = Object.entries(warmupCookies)
|
||||
.map(([k, v]) => `${k}=${v}`)
|
||||
.join("; ");
|
||||
|
||||
const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG);
|
||||
|
||||
// Format cookies for HTTP header
|
||||
const domain = "www.facebook.com";
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
||||
if (!cookiesHeader) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||
);
|
||||
}
|
||||
const userCookiesHeader = formatCookiesForHeader(userCookies, domain);
|
||||
const cookiesHeader = [warmupHeader, userCookiesHeader]
|
||||
.filter(Boolean)
|
||||
.join("; ");
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
|
||||
|
||||
// Encode search query for URL
|
||||
const encodedQuery = encodeURIComponent(SEARCH_QUERY);
|
||||
@@ -1085,86 +1100,110 @@ export default async function fetchFacebookItems(
|
||||
// Facebook marketplace URL structure
|
||||
const searchUrl = `https://www.facebook.com/marketplace/${LOCATION}/search?query=${encodedQuery}&sortBy=creation_time_descend&exact=false`;
|
||||
|
||||
console.log(`Fetching Facebook marketplace: ${searchUrl}`);
|
||||
console.log(`Using ${cookies.length} cookies for authentication`);
|
||||
logger.log(`Fetching Facebook marketplace: ${searchUrl}`);
|
||||
if (userCookies.length > 0) {
|
||||
logger.log(`Using ${userCookies.length} cookies for authentication`);
|
||||
}
|
||||
|
||||
let searchHtml: string;
|
||||
let searchResponseUrl = searchUrl;
|
||||
try {
|
||||
const response = await fetchHtml(searchUrl, DELAY_MS, {
|
||||
maxRetries: 3,
|
||||
includeResponseUrl: true,
|
||||
headers: createFacebookHeaders(cookiesHeader),
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
logger.log(
|
||||
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
cookies: cookiesHeader,
|
||||
});
|
||||
searchHtml = response.html;
|
||||
searchResponseUrl = response.responseUrl;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.warn(
|
||||
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
|
||||
logger.warn(
|
||||
`\nFacebook marketplace access failed (${err.statusCode}): ${err.message}`,
|
||||
);
|
||||
if (err.status === 400 || err.status === 401 || err.status === 403) {
|
||||
console.warn(
|
||||
if (
|
||||
err.statusCode === 400 ||
|
||||
err.statusCode === 401 ||
|
||||
err.statusCode === 403
|
||||
) {
|
||||
logger.warn(
|
||||
"This might indicate invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
|
||||
);
|
||||
}
|
||||
return [];
|
||||
return finalizeResults([]);
|
||||
}
|
||||
if (err instanceof RateLimitError) {
|
||||
logger.warn(`\nFacebook marketplace access rate limited: ${err.message}`);
|
||||
return finalizeResults([]);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
const classification = classifyFacebookResponse(searchHtml, searchResponseUrl);
|
||||
const classification = classifyFacebookResponse(
|
||||
searchHtml,
|
||||
searchResponseUrl,
|
||||
);
|
||||
if (classification.authGated) {
|
||||
console.warn("Facebook marketplace search redirected to login. Cookies may be expired.");
|
||||
return [];
|
||||
logger.warn(
|
||||
"Facebook marketplace search redirected to login. Cookies may be expired.",
|
||||
);
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
if (classification.kind === "checkpoint") {
|
||||
logger.warn(
|
||||
"Facebook marketplace returned a checkpoint challenge. This may require manual verification.",
|
||||
);
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
if (classification.unavailable) {
|
||||
console.warn("Facebook marketplace search returned an unavailable route.");
|
||||
return [];
|
||||
logger.warn("Facebook marketplace search returned an unavailable route.");
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
if (classification.kind !== "search") {
|
||||
console.warn(
|
||||
logger.warn(
|
||||
`Facebook marketplace search returned unexpected route kind: ${classification.kind}.`,
|
||||
);
|
||||
return [];
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
const ads = extractFacebookMarketplaceData(searchHtml);
|
||||
if (!ads || ads.length === 0) {
|
||||
console.warn("No ads parsed from Facebook marketplace page.");
|
||||
return [];
|
||||
logger.warn("No ads parsed from Facebook marketplace page.");
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
console.log(`\nFound ${ads.length} raw ads. Processing...`);
|
||||
logger.log(`\nFound ${ads.length} raw ads. Processing...`);
|
||||
|
||||
const progressBar = new cliProgress.SingleBar(
|
||||
{},
|
||||
cliProgress.Presets.shades_classic,
|
||||
);
|
||||
const isTTY = process.stdout?.isTTY ?? false;
|
||||
const progressBar = isTTY
|
||||
? new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
|
||||
: null;
|
||||
const totalProgress = ads.length;
|
||||
const currentProgress = 0;
|
||||
progressBar.start(totalProgress, currentProgress);
|
||||
progressBar?.start(totalProgress, 0);
|
||||
|
||||
const items = parseFacebookAds(ads);
|
||||
|
||||
// Filter to only priced items (already done in parseFacebookAds)
|
||||
const pricedItems = items.filter(
|
||||
(item) => item.listingPrice?.cents && item.listingPrice.cents > 0,
|
||||
(item) =>
|
||||
typeof item.listingPrice?.cents === "number" &&
|
||||
item.listingPrice.cents >= 0,
|
||||
);
|
||||
|
||||
progressBar.update(totalProgress);
|
||||
progressBar.stop();
|
||||
progressBar?.update(totalProgress);
|
||||
progressBar?.stop();
|
||||
|
||||
console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
|
||||
return pricedItems.slice(0, MAX_ITEMS); // Limit results
|
||||
logger.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
|
||||
return finalizeResults(pricedItems);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1173,127 +1212,158 @@ export default async function fetchFacebookItems(
|
||||
export async function fetchFacebookItem(
|
||||
itemId: string,
|
||||
): Promise<FacebookListingDetails | null> {
|
||||
const cookies = await ensureFacebookCookies();
|
||||
|
||||
// Format cookies for HTTP header
|
||||
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
|
||||
if (!cookiesHeader) {
|
||||
throw new Error(
|
||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||
);
|
||||
}
|
||||
const userCookies = await loadCookiesOptional(FACEBOOK_COOKIE_CONFIG);
|
||||
const cookiesHeader = formatCookiesForHeader(userCookies, "www.facebook.com");
|
||||
|
||||
const itemUrl = `https://www.facebook.com/marketplace/item/${itemId}/`;
|
||||
|
||||
console.log(`Fetching Facebook marketplace item: ${itemUrl}`);
|
||||
logger.log(`Fetching Facebook marketplace item: ${itemUrl}`);
|
||||
|
||||
let itemHtml: string;
|
||||
let itemResponseUrl = itemUrl;
|
||||
try {
|
||||
const response = await fetchHtml(itemUrl, 1000, {
|
||||
includeResponseUrl: true,
|
||||
headers: createFacebookHeaders(cookiesHeader),
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
logger.log(
|
||||
`\nFacebook - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
cookies: cookiesHeader,
|
||||
});
|
||||
itemHtml = response.html;
|
||||
itemResponseUrl = response.responseUrl;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.warn(
|
||||
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`,
|
||||
logger.warn(
|
||||
`\nFacebook marketplace item access failed (${err.statusCode}): ${err.message}`,
|
||||
);
|
||||
|
||||
// Enhanced error handling based on status codes
|
||||
switch (err.status) {
|
||||
switch (err.statusCode) {
|
||||
case 400:
|
||||
case 401:
|
||||
case 403:
|
||||
console.warn(
|
||||
logger.warn(
|
||||
"Authentication error: Invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
|
||||
);
|
||||
break;
|
||||
case 404:
|
||||
console.warn(
|
||||
logger.warn(
|
||||
"Listing not found: The marketplace item may have been removed, sold, or the URL is invalid.",
|
||||
);
|
||||
break;
|
||||
case 429:
|
||||
console.warn(
|
||||
logger.warn(
|
||||
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
|
||||
);
|
||||
break;
|
||||
case 500:
|
||||
case 502:
|
||||
case 503:
|
||||
console.warn(
|
||||
logger.warn(
|
||||
"Facebook server error: Marketplace may be temporarily unavailable.",
|
||||
);
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unexpected error status: ${err.status}`);
|
||||
logger.warn(`Unexpected error status: ${err.statusCode}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (err instanceof RateLimitError) {
|
||||
logger.warn(
|
||||
`\nFacebook marketplace item rate limited for item ${itemId}: ${err.message}`,
|
||||
);
|
||||
logger.warn(
|
||||
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
|
||||
);
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
const classification = classifyFacebookResponse(itemHtml, itemResponseUrl);
|
||||
|
||||
if (classification.kind === "checkpoint") {
|
||||
logExtractionMetrics(false, itemId);
|
||||
logger.warn(
|
||||
`Checkpoint challenge detected for item ${itemId}. Facebook may be limiting access.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (classification.authGated) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
console.warn(`Authentication failed for item ${itemId}. Cookies may be expired.`);
|
||||
logger.warn(
|
||||
`Authentication failed for item ${itemId}. Cookies may be expired.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (classification.unavailable || itemHtml.includes("This item has been sold")) {
|
||||
if (itemResponseUrl.includes("unavailable_product=1")) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
console.warn(`Item ${itemId} appears to be sold or removed from marketplace.`);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (classification.kind !== "item") {
|
||||
logExtractionMetrics(false, itemId);
|
||||
console.warn(
|
||||
`Item ${itemId} returned unexpected route kind: ${classification.kind}.`,
|
||||
logger.warn(
|
||||
`Item ${itemId} appears to be sold or removed from marketplace.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
const itemData = extractFacebookItemData(itemHtml);
|
||||
|
||||
if (classification.unavailable && !itemData) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
logger.warn(
|
||||
`Item ${itemId} appears to be sold or removed from marketplace.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (classification.kind !== "item" && !itemData) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
logger.warn(
|
||||
`Item ${itemId} returned unexpected route kind: ${classification.kind}.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!itemData) {
|
||||
logExtractionMetrics(false, itemId);
|
||||
|
||||
console.warn(
|
||||
if (itemHtml.includes("This item has been sold")) {
|
||||
logger.warn(
|
||||
`Item ${itemId} appears to be sold or removed from marketplace.`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
logger.warn(
|
||||
`No item data found in Facebook marketplace page for item ${itemId}. This may indicate:`,
|
||||
);
|
||||
console.warn(" - The listing was removed or sold");
|
||||
console.warn(" - Authentication issues");
|
||||
console.warn(" - Facebook changed their API structure");
|
||||
console.warn(" - Network or parsing issues");
|
||||
logger.warn(" - The listing was removed or sold");
|
||||
logger.warn(" - Authentication issues");
|
||||
logger.warn(" - Facebook changed their API structure");
|
||||
logger.warn(" - Network or parsing issues");
|
||||
return null;
|
||||
}
|
||||
|
||||
logExtractionMetrics(true, itemId);
|
||||
console.log(`Successfully extracted data for item ${itemId}`);
|
||||
logger.log(`Successfully extracted data for item ${itemId}`);
|
||||
|
||||
const parsedItem = parseFacebookItem(itemData);
|
||||
if (!parsedItem) {
|
||||
console.warn(`Failed to parse item ${itemId}: Invalid data structure`);
|
||||
logger.warn(`Failed to parse item ${itemId}: Invalid data structure`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Check for sold/removed status in the parsed data with proper precedence
|
||||
if (itemData.is_sold) {
|
||||
console.warn(`Item ${itemId} is marked as sold in the marketplace.`);
|
||||
logger.warn(`Item ${itemId} is marked as sold in the marketplace.`);
|
||||
// Still return the data but mark it as sold
|
||||
parsedItem.listingStatus = "SOLD";
|
||||
} else if (!itemData.is_live) {
|
||||
console.warn(`Item ${itemId} is not live/active in the marketplace.`);
|
||||
logger.warn(`Item ${itemId} is not live/active in the marketplace.`);
|
||||
parsedItem.listingStatus = itemData.is_hidden
|
||||
? "HIDDEN"
|
||||
: itemData.is_pending
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import unidecode from "unidecode";
|
||||
import type { HTMLString } from "../types/common";
|
||||
import type {
|
||||
HTMLString,
|
||||
UnstableListingBuckets,
|
||||
UnstableListingModeOptions,
|
||||
} from "../types/common";
|
||||
import {
|
||||
type CookieConfig,
|
||||
formatCookiesForHeader,
|
||||
loadCookiesOptional,
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import {
|
||||
fetchHtml,
|
||||
@@ -17,13 +22,14 @@ import {
|
||||
RateLimitError,
|
||||
ValidationError,
|
||||
} from "../utils/http";
|
||||
import { logger } from "../utils/logger";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
|
||||
// Kijiji cookie configuration
|
||||
const KIJIJI_COOKIE_CONFIG: CookieConfig = {
|
||||
name: "Kijiji",
|
||||
domain: ".kijiji.ca",
|
||||
envVar: "KIJIJI_COOKIE",
|
||||
filePath: "./cookies/kijiji.json",
|
||||
};
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
@@ -41,6 +47,17 @@ interface ApolloSearchItem {
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
type ListingAttribute = {
|
||||
canonicalName?: string;
|
||||
canonicalValues?: string[];
|
||||
};
|
||||
|
||||
type ListingAttributes =
|
||||
| ListingAttribute[]
|
||||
| {
|
||||
all?: ListingAttribute[];
|
||||
};
|
||||
|
||||
interface ApolloListingRoot {
|
||||
url?: string;
|
||||
title?: string;
|
||||
@@ -63,7 +80,7 @@ interface ApolloListingRoot {
|
||||
adSource?: string;
|
||||
flags?: { topAd?: boolean; priceDrop?: boolean };
|
||||
posterInfo?: { posterId?: string; rating?: number };
|
||||
attributes?: Array<{ canonicalName?: string; canonicalValues?: string[] }>;
|
||||
attributes?: ListingAttributes;
|
||||
[k: string]: unknown;
|
||||
}
|
||||
|
||||
@@ -197,18 +214,43 @@ const SORT_MAPPINGS: Record<string, string> = {
|
||||
distance: "DISTANCE",
|
||||
};
|
||||
|
||||
const LOCATION_SLUGS = Object.fromEntries(
|
||||
Object.entries(LOCATION_MAPPINGS).map(([slug, id]) => [
|
||||
id,
|
||||
slug.replace(/\s+/g, "-"),
|
||||
]),
|
||||
) as Record<number, string>;
|
||||
|
||||
const CATEGORY_SLUGS = Object.fromEntries(
|
||||
Object.entries(CATEGORY_MAPPINGS).map(([slug, id]) => [
|
||||
id,
|
||||
slug.replace(/\s+/g, "-"),
|
||||
]),
|
||||
) as Record<number, string>;
|
||||
|
||||
// ----------------------------- Utilities -----------------------------
|
||||
|
||||
const SEPS = new Set([" ", "–", "—", "/", ":", ";", ",", ".", "-"]);
|
||||
|
||||
function normalizeLookupKey(value: string): string {
|
||||
return value.toLowerCase().replace(/[\s-]+/g, "-");
|
||||
}
|
||||
|
||||
function centsToKijijiPriceParam(cents: number): number {
|
||||
return Math.floor(cents / 100);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve location ID from name or return numeric ID
|
||||
*/
|
||||
export function resolveLocationId(location?: number | string): number {
|
||||
if (typeof location === "number") return location;
|
||||
if (typeof location === "string") {
|
||||
const normalized = location.toLowerCase().replace(/\s+/g, "-");
|
||||
return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0)
|
||||
const normalized = normalizeLookupKey(location);
|
||||
const mapping = Object.entries(LOCATION_MAPPINGS).find(
|
||||
([key]) => normalizeLookupKey(key) === normalized,
|
||||
);
|
||||
return mapping?.[1] ?? 0; // Default to Canada (0)
|
||||
}
|
||||
return 0; // Default to Canada
|
||||
}
|
||||
@@ -219,12 +261,38 @@ export function resolveLocationId(location?: number | string): number {
|
||||
export function resolveCategoryId(category?: number | string): number {
|
||||
if (typeof category === "number") return category;
|
||||
if (typeof category === "string") {
|
||||
const normalized = category.toLowerCase().replace(/\s+/g, "-");
|
||||
return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories
|
||||
const normalized = normalizeLookupKey(category);
|
||||
const mapping = Object.entries(CATEGORY_MAPPINGS).find(
|
||||
([key]) => normalizeLookupKey(key) === normalized,
|
||||
);
|
||||
return mapping?.[1] ?? 0; // Default to all categories
|
||||
}
|
||||
return 0; // Default to all categories
|
||||
}
|
||||
|
||||
function matchesPriceFilters(
|
||||
listing: DetailedListing,
|
||||
searchOptions: SearchOptions,
|
||||
): boolean {
|
||||
const cents = listing.listingPrice?.cents;
|
||||
|
||||
if (typeof cents !== "number") return false;
|
||||
if (
|
||||
typeof searchOptions.priceMin === "number" &&
|
||||
cents < searchOptions.priceMin
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
typeof searchOptions.priceMax === "number" &&
|
||||
cents > searchOptions.priceMax
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build search URL with enhanced parameters
|
||||
*/
|
||||
@@ -236,23 +304,53 @@ export function buildSearchUrl(
|
||||
const locationId = resolveLocationId(options.location);
|
||||
const categoryId = resolveCategoryId(options.category);
|
||||
|
||||
const categorySlug = categoryId === 0 ? "buy-sell" : "buy-sell";
|
||||
const locationSlug = locationId === 0 ? "canada" : "canada";
|
||||
const categorySlug = CATEGORY_SLUGS[categoryId] ?? "buy-sell";
|
||||
const locationSlug = LOCATION_SLUGS[locationId] ?? "canada";
|
||||
|
||||
let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`;
|
||||
|
||||
const sortParam = options.sortBy
|
||||
? `&sort=${SORT_MAPPINGS[options.sortBy]}`
|
||||
: "";
|
||||
const sortValue =
|
||||
options.sortBy && options.sortBy !== "relevancy"
|
||||
? SORT_MAPPINGS[options.sortBy]
|
||||
: "relevancyDesc";
|
||||
const sortOrder = options.sortOrder === "asc" ? "ASC" : "DESC";
|
||||
const priceMinParam =
|
||||
typeof options.priceMin === "number"
|
||||
? `&priceMin=${centsToKijijiPriceParam(options.priceMin)}`
|
||||
: "";
|
||||
const priceMaxParam =
|
||||
typeof options.priceMax === "number"
|
||||
? `&priceMax=${centsToKijijiPriceParam(options.priceMax)}`
|
||||
: "";
|
||||
const pageParam =
|
||||
options.page && options.page > 1 ? `&page=${options.page}` : "";
|
||||
|
||||
url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`;
|
||||
url += `?sort=${sortValue}&view=list&order=${sortOrder}${priceMinParam}${priceMaxParam}${pageParam}`;
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
function findApolloListingKey(
|
||||
apolloState: ApolloRecord,
|
||||
predicate: (value: Record<string, unknown>) => boolean,
|
||||
): string | undefined {
|
||||
return Object.keys(apolloState).find((key) => {
|
||||
if (!isListingRecordKey(key)) return false;
|
||||
|
||||
const value = apolloState[key];
|
||||
return isRecord(value) && predicate(value);
|
||||
});
|
||||
}
|
||||
|
||||
function isListingRecordKey(key: string): boolean {
|
||||
return key.startsWith("Listing:") || key.startsWith("StandardListing:");
|
||||
}
|
||||
|
||||
function getListingAttributes(attributes: ListingAttributes | undefined) {
|
||||
if (Array.isArray(attributes)) return attributes;
|
||||
return attributes?.all ?? [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Slugifies a string for Kijiji search URLs
|
||||
*/
|
||||
@@ -391,18 +489,16 @@ async function fetchSellerDetails(
|
||||
accountType?: string;
|
||||
}> {
|
||||
try {
|
||||
const [reviewData, profileData] = await Promise.all([
|
||||
fetchGraphQLData(
|
||||
GRAPHQL_QUERIES.getReviewSummary,
|
||||
{ userId: posterId },
|
||||
BASE_URL,
|
||||
),
|
||||
fetchGraphQLData(
|
||||
GRAPHQL_QUERIES.getProfileMetrics,
|
||||
{ profileId: posterId },
|
||||
BASE_URL,
|
||||
),
|
||||
]);
|
||||
const reviewData = await fetchGraphQLData(
|
||||
GRAPHQL_QUERIES.getReviewSummary,
|
||||
{ userId: posterId },
|
||||
BASE_URL,
|
||||
);
|
||||
const profileData = await fetchGraphQLData(
|
||||
GRAPHQL_QUERIES.getProfileMetrics,
|
||||
{ profileId: posterId },
|
||||
BASE_URL,
|
||||
);
|
||||
|
||||
const reviewResponse = reviewData as GraphQLReviewResponse;
|
||||
const profileResponse = profileData as GraphQLProfileResponse;
|
||||
@@ -415,7 +511,7 @@ async function fetchSellerDetails(
|
||||
};
|
||||
} catch (err) {
|
||||
// Silently fail for GraphQL errors - not critical for basic functionality
|
||||
console.warn(
|
||||
logger.warn(
|
||||
`Failed to fetch seller details for ${posterId}:`,
|
||||
err instanceof Error ? err.message : String(err),
|
||||
);
|
||||
@@ -457,8 +553,7 @@ export function parseSearch(
|
||||
|
||||
const results: SearchListing[] = [];
|
||||
for (const [key, value] of Object.entries(apolloState)) {
|
||||
// Heuristic: Kijiji listing keys usually contain "Listing"
|
||||
if (!key.includes("Listing")) continue;
|
||||
if (!isListingRecordKey(key)) continue;
|
||||
if (!isRecord(value)) continue;
|
||||
|
||||
const item = value as ApolloSearchItem;
|
||||
@@ -474,78 +569,6 @@ export function parseSearch(
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
Parse a listing page into a typed object (backward compatible).
|
||||
*/
|
||||
function _parseListing(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string,
|
||||
): KijijiListingDetails | null {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return null;
|
||||
|
||||
// Find the listing root key
|
||||
const listingKey = Object.keys(apolloState).find((k) =>
|
||||
k.includes("Listing"),
|
||||
);
|
||||
if (!listingKey) return null;
|
||||
|
||||
const root = apolloState[listingKey];
|
||||
if (!isRecord(root)) return null;
|
||||
|
||||
const {
|
||||
url,
|
||||
title,
|
||||
description,
|
||||
price,
|
||||
type,
|
||||
status,
|
||||
activationDate,
|
||||
endDate,
|
||||
metrics,
|
||||
location,
|
||||
} = root as ApolloListingRoot;
|
||||
|
||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||
const amountFormatted =
|
||||
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
|
||||
|
||||
const numberOfViews =
|
||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||
|
||||
const listingUrl =
|
||||
typeof url === "string"
|
||||
? url.startsWith("http")
|
||||
? url
|
||||
: `${BASE_URL}${url}`
|
||||
: "";
|
||||
|
||||
if (!listingUrl || !title) return null;
|
||||
|
||||
return {
|
||||
url: listingUrl,
|
||||
title,
|
||||
description,
|
||||
listingPrice: amountFormatted
|
||||
? {
|
||||
amountFormatted,
|
||||
cents:
|
||||
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
|
||||
currency: price?.currency,
|
||||
}
|
||||
: undefined,
|
||||
listingType: type,
|
||||
listingStatus: status,
|
||||
creationDate: activationDate,
|
||||
endDate,
|
||||
numberOfViews:
|
||||
numberOfViews !== undefined && Number.isFinite(numberOfViews)
|
||||
? numberOfViews
|
||||
: undefined,
|
||||
address: location?.address ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a listing page into a detailed object with all available fields
|
||||
*/
|
||||
@@ -557,9 +580,12 @@ export async function parseDetailedListing(
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return null;
|
||||
|
||||
// Find the listing root key
|
||||
const listingKey = Object.keys(apolloState).find((k) =>
|
||||
k.includes("Listing"),
|
||||
const listingKey = findApolloListingKey(
|
||||
apolloState,
|
||||
(value) =>
|
||||
typeof value.url === "string" &&
|
||||
typeof value.title === "string" &&
|
||||
isRecord(value.price),
|
||||
);
|
||||
if (!listingKey) return null;
|
||||
|
||||
@@ -612,11 +638,9 @@ export async function parseDetailedListing(
|
||||
|
||||
// Extract attributes as key-value pairs
|
||||
const attributeMap: Record<string, string[]> = {};
|
||||
if (Array.isArray(attributes)) {
|
||||
for (const attr of attributes) {
|
||||
if (attr?.canonicalName && Array.isArray(attr.canonicalValues)) {
|
||||
attributeMap[attr.canonicalName] = attr.canonicalValues;
|
||||
}
|
||||
for (const attr of getListingAttributes(attributes)) {
|
||||
if (attr.canonicalName && Array.isArray(attr.canonicalValues)) {
|
||||
attributeMap[attr.canonicalName] = attr.canonicalValues;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -647,7 +671,7 @@ export async function parseDetailedListing(
|
||||
};
|
||||
} catch {
|
||||
// Silently fail - GraphQL data is optional
|
||||
console.warn(
|
||||
logger.warn(
|
||||
`Failed to fetch additional seller data for ${posterInfo.posterId}`,
|
||||
);
|
||||
}
|
||||
@@ -696,14 +720,43 @@ export async function parseDetailedListing(
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND: number | undefined,
|
||||
BASE_URL: string | undefined,
|
||||
searchOptions: SearchOptions | undefined,
|
||||
listingOptions: ListingFetchOptions | undefined,
|
||||
unstableMode: { hideUnstableResults: true },
|
||||
): Promise<UnstableListingBuckets<DetailedListing>>;
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
BASE_URL?: string,
|
||||
searchOptions?: SearchOptions,
|
||||
listingOptions?: ListingFetchOptions,
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<DetailedListing[]>;
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
BASE_URL = "https://www.kijiji.ca",
|
||||
searchOptions: SearchOptions = {},
|
||||
listingOptions: ListingFetchOptions = {},
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
const requestsPerSecond = REQUESTS_PER_SECOND > 0 ? REQUESTS_PER_SECOND : 1;
|
||||
|
||||
const finalizeResults = (
|
||||
listings: DetailedListing[],
|
||||
): DetailedListing[] | UnstableListingBuckets<DetailedListing> => {
|
||||
if (!unstableMode.hideUnstableResults) {
|
||||
return listings;
|
||||
}
|
||||
|
||||
return classifyUnstableListings(listings);
|
||||
};
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / requestsPerSecond));
|
||||
|
||||
// Load Kijiji cookies (optional - helps bypass bot detection)
|
||||
const cookies = await loadCookiesOptional(
|
||||
@@ -716,15 +769,21 @@ export default async function fetchKijijiItems(
|
||||
: undefined;
|
||||
|
||||
// Set defaults for configuration
|
||||
const finalSearchOptions: Required<SearchOptions> = {
|
||||
const finalSearchOptions: Omit<
|
||||
Required<SearchOptions>,
|
||||
"priceMin" | "priceMax"
|
||||
> & {
|
||||
priceMin?: number;
|
||||
priceMax?: number;
|
||||
} = {
|
||||
location: searchOptions.location ?? 1700272, // Default to GTA
|
||||
category: searchOptions.category ?? 0, // Default to all categories
|
||||
keywords: searchOptions.keywords ?? SEARCH_QUERY,
|
||||
sortBy: searchOptions.sortBy ?? "relevancy",
|
||||
sortOrder: searchOptions.sortOrder ?? "desc",
|
||||
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
||||
priceMin: searchOptions.priceMin as number,
|
||||
priceMax: searchOptions.priceMax as number,
|
||||
priceMin: searchOptions.priceMin,
|
||||
priceMax: searchOptions.priceMax,
|
||||
cookies: searchOptions.cookies ?? "",
|
||||
};
|
||||
|
||||
@@ -749,11 +808,11 @@ export default async function fetchKijijiItems(
|
||||
BASE_URL,
|
||||
);
|
||||
|
||||
console.log(`Fetching search page ${page}: ${searchUrl}`);
|
||||
logger.log(`Fetching search page ${page}: ${searchUrl}`);
|
||||
const searchHtml = await fetchHtml(searchUrl, DELAY_MS, {
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
logger.log(
|
||||
`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
@@ -763,9 +822,17 @@ export default async function fetchKijijiItems(
|
||||
|
||||
const searchResults = parseSearch(searchHtml, BASE_URL);
|
||||
if (searchResults.length === 0) {
|
||||
console.log(
|
||||
`No more results found on page ${page}. Stopping pagination.`,
|
||||
);
|
||||
if (page === 1) {
|
||||
logger.log(
|
||||
`No results found on page 1. The search URL was: ${searchUrl}\n` +
|
||||
`Tip: Kijiji matches ALL words in the query against listing titles. ` +
|
||||
`Try a shorter or more common query (e.g. "macbook air m1" instead of "macbook air m1 apple silicon").`,
|
||||
);
|
||||
} else {
|
||||
logger.log(
|
||||
`No more results found on page ${page}. Stopping pagination.`,
|
||||
);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -778,7 +845,7 @@ export default async function fetchKijijiItems(
|
||||
seenUrls.add(link);
|
||||
}
|
||||
|
||||
console.log(
|
||||
logger.log(
|
||||
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`,
|
||||
);
|
||||
|
||||
@@ -792,18 +859,22 @@ export default async function fetchKijijiItems(
|
||||
progressBar?.start(totalProgress, currentProgress);
|
||||
|
||||
// Process in batches for controlled concurrency
|
||||
const CONCURRENT_REQUESTS = REQUESTS_PER_SECOND * 2; // 2x rate for faster processing
|
||||
const CONCURRENT_REQUESTS = Math.max(1, Math.floor(requestsPerSecond));
|
||||
const results: (DetailedListing | null)[] = [];
|
||||
|
||||
for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) {
|
||||
const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS);
|
||||
const batchPromises = batch.map(async (link) => {
|
||||
const batchPromises = batch.map(async (link, batchIndex) => {
|
||||
try {
|
||||
if (batchIndex > 0) {
|
||||
await delay(DELAY_MS * batchIndex);
|
||||
}
|
||||
|
||||
const html = await fetchHtml(link, 0, {
|
||||
// No per-request delay, batch handles rate limit
|
||||
// Staggered starts keep request pacing within REQUESTS_PER_SECOND.
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
logger.log(
|
||||
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
@@ -818,11 +889,11 @@ export default async function fetchKijijiItems(
|
||||
return parsed;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
logger.warn(
|
||||
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
|
||||
);
|
||||
} else {
|
||||
console.error(
|
||||
logger.warn(
|
||||
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
|
||||
);
|
||||
}
|
||||
@@ -831,7 +902,7 @@ export default async function fetchKijijiItems(
|
||||
currentProgress++;
|
||||
progressBar?.update(currentProgress);
|
||||
if (!progressBar) {
|
||||
console.log(`Progress: ${currentProgress}/${totalProgress}`);
|
||||
logger.log(`Progress: ${currentProgress}/${totalProgress}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -839,11 +910,8 @@ export default async function fetchKijijiItems(
|
||||
const batchResults = await Promise.all(batchPromises);
|
||||
results.push(...batchResults);
|
||||
|
||||
// Wait between batches to respect rate limit
|
||||
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, DELAY_MS * batch.length),
|
||||
);
|
||||
await delay(DELAY_MS);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -859,8 +927,12 @@ export default async function fetchKijijiItems(
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nParsed ${allListings.length} detailed listings.`);
|
||||
return allListings;
|
||||
const filteredListings = allListings.filter((listing) =>
|
||||
matchesPriceFilters(listing, finalSearchOptions),
|
||||
);
|
||||
|
||||
logger.log(`\nParsed ${filteredListings.length} detailed listings.`);
|
||||
return finalizeResults(filteredListings);
|
||||
}
|
||||
|
||||
// Re-export error classes for convenience
|
||||
|
||||
@@ -18,3 +18,12 @@ export interface ListingDetails {
|
||||
address?: string | null;
|
||||
creationDate?: string;
|
||||
}
|
||||
|
||||
export interface UnstableListingBuckets<T> {
|
||||
results: T[];
|
||||
unstableResults: T[];
|
||||
}
|
||||
|
||||
export interface UnstableListingModeOptions {
|
||||
hideUnstableResults?: boolean;
|
||||
}
|
||||
|
||||
@@ -2,9 +2,12 @@
|
||||
* Shared cookie handling utilities for marketplace scrapers
|
||||
*/
|
||||
|
||||
import { logger } from "./logger";
|
||||
|
||||
export interface Cookie {
|
||||
name: string;
|
||||
value: string;
|
||||
rawValue?: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
secure?: boolean;
|
||||
@@ -41,9 +44,9 @@ export function parseCookieString(
|
||||
.split(";")
|
||||
.map((pair) => pair.trim())
|
||||
.filter((pair) => pair.includes("="))
|
||||
.map((pair) => {
|
||||
.map((pair): Cookie | null => {
|
||||
const [name, ...valueParts] = pair.split("=");
|
||||
const trimmedName = name.trim();
|
||||
const trimmedName = name?.trim();
|
||||
const trimmedValue = valueParts.join("=").trim();
|
||||
|
||||
if (!trimmedName || !trimmedValue) {
|
||||
@@ -53,6 +56,7 @@ export function parseCookieString(
|
||||
return {
|
||||
name: trimmedName,
|
||||
value: decodeURIComponent(trimmedValue),
|
||||
rawValue: trimmedValue,
|
||||
domain,
|
||||
path: "/",
|
||||
secure: true,
|
||||
@@ -93,19 +97,30 @@ export function formatCookiesForHeader(
|
||||
});
|
||||
|
||||
return validCookies
|
||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||
.map((cookie) => `${cookie.name}=${cookie.rawValue ?? cookie.value}`)
|
||||
.join("; ");
|
||||
}
|
||||
|
||||
/**
|
||||
* Load cookies from the configured environment variable
|
||||
* Load cookies from the configured environment variable or explicit cookie string
|
||||
*/
|
||||
export async function ensureCookies(config: CookieConfig): Promise<Cookie[]> {
|
||||
export async function ensureCookies(
|
||||
config: CookieConfig,
|
||||
cookiesSource?: string,
|
||||
): Promise<Cookie[]> {
|
||||
// Explicit cookie string takes priority
|
||||
if (cookiesSource) {
|
||||
const cookies = parseCookieString(cookiesSource, config.domain);
|
||||
if (cookies.length > 0) {
|
||||
return cookies;
|
||||
}
|
||||
}
|
||||
|
||||
const envValue = process.env[config.envVar];
|
||||
const cookies = parseCookieString(envValue ?? "", config.domain);
|
||||
|
||||
if (cookies.length > 0) {
|
||||
console.log(
|
||||
logger.log(
|
||||
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
|
||||
);
|
||||
return cookies;
|
||||
|
||||
@@ -4,5 +4,7 @@
|
||||
* @returns A promise that resolves after the specified delay
|
||||
*/
|
||||
export function delay(ms: number): Promise<void> {
|
||||
if (process.env.NODE_ENV === "test") return Promise.resolve();
|
||||
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
239
packages/core/src/utils/ebay-challenge.ts
Normal file
239
packages/core/src/utils/ebay-challenge.ts
Normal file
@@ -0,0 +1,239 @@
|
||||
import argon2 from "argon2-wasm-pro";
|
||||
|
||||
// ------------------ Types ------------------
|
||||
|
||||
interface ChallengeDetails {
|
||||
p2: number;
|
||||
p6: number;
|
||||
p7: number;
|
||||
p9: string;
|
||||
p11: string;
|
||||
p12: number;
|
||||
p13: number;
|
||||
p15: number;
|
||||
}
|
||||
|
||||
interface ChallengeParams {
|
||||
crefId: string;
|
||||
cdetail: ChallengeDetails;
|
||||
iid: string;
|
||||
chlghost: string;
|
||||
appName: string;
|
||||
p: string;
|
||||
destUrl: string;
|
||||
}
|
||||
|
||||
interface ChallengeResult {
|
||||
cookies: string;
|
||||
}
|
||||
|
||||
// ------------------ Helpers ------------------
|
||||
|
||||
function memcmp(a: Uint8Array, b: number[], len: number): number {
|
||||
for (let i = 0; i < len; i++) {
|
||||
const va = a[i] ?? 0;
|
||||
const vb = b[i] ?? 0;
|
||||
if (va !== vb) return (va & 0xff) - (vb & 0xff);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function intToBytes(val: number, arr: Uint8Array, offset: number) {
|
||||
arr[offset] = val >>> 24;
|
||||
arr[offset + 1] = val >>> 16;
|
||||
arr[offset + 2] = val >>> 8;
|
||||
arr[offset + 3] = val;
|
||||
}
|
||||
|
||||
function string2Bin(str: string): number[] {
|
||||
const result: number[] = [];
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
result.push(str.charCodeAt(i));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function bufferToBase64(buf: Uint8Array): string {
|
||||
return btoa(String.fromCharCode(...buf));
|
||||
}
|
||||
|
||||
function parseCookiesFromSetCookie(cookies: string[]): Record<string, string> {
|
||||
const result: Record<string, string> = {};
|
||||
for (const header of cookies) {
|
||||
const match = header.match(/^([^=]+)=([^;]+)/);
|
||||
if (match?.[1] && match[2]) {
|
||||
result[match[1]] = match[2];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// ------------------ Default headers ------------------
|
||||
|
||||
const BROWSER_UA =
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
||||
|
||||
const _EBAY_HEADERS: Record<string, string> = {
|
||||
"User-Agent": BROWSER_UA,
|
||||
Accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-CA,en-US;q=0.9,en;q=0.8",
|
||||
};
|
||||
|
||||
// ------------------ Parser ------------------
|
||||
|
||||
export function parseChallengePage(html: string): ChallengeParams | null {
|
||||
const getHidden = (id: string): string => {
|
||||
const re = new RegExp(
|
||||
`id=${id}\\s+value='([^']*)'` +
|
||||
`|id=${id}\\s+value="([^"]*)"` +
|
||||
`|id=${id}\\s+value=([^\\s>]+)`,
|
||||
"i",
|
||||
);
|
||||
const m = html.match(re);
|
||||
if (!m) return "";
|
||||
return m[1] ?? m[2] ?? m[3] ?? "";
|
||||
};
|
||||
|
||||
const crefId = getHidden("_crefId");
|
||||
const cdetailRaw = getHidden("_cdetail");
|
||||
const iid = getHidden("_iid");
|
||||
const chlghost = getHidden("_chlghost");
|
||||
const appName = getHidden("_appName");
|
||||
const p = getHidden("_p");
|
||||
|
||||
const formActionMatch = html.match(
|
||||
/<form\s+id=destForm\s+[^>]*action=([^\s>]+)/i,
|
||||
);
|
||||
const destUrl = formActionMatch?.[1]?.trim() ?? "";
|
||||
|
||||
if (!crefId || !cdetailRaw) return null;
|
||||
|
||||
let cdetail: ChallengeDetails;
|
||||
try {
|
||||
const parsed = JSON.parse(cdetailRaw);
|
||||
const d = parsed.details;
|
||||
cdetail = {
|
||||
p2: Number(d.p2),
|
||||
p6: Number(d.p6),
|
||||
p7: Number(d.p7),
|
||||
p9: d.p9,
|
||||
p11: d.p11,
|
||||
p12: Number(d.p12),
|
||||
p13: Number(d.p13),
|
||||
p15: Number(d.p15),
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
crefId,
|
||||
cdetail,
|
||||
iid,
|
||||
chlghost: chlghost || "https://www.ebay.ca",
|
||||
appName: appName || "orch",
|
||||
p,
|
||||
destUrl,
|
||||
};
|
||||
}
|
||||
|
||||
// ------------------ Solver ------------------
|
||||
|
||||
async function solveArgon2Challenge(
|
||||
cdetail: ChallengeDetails,
|
||||
): Promise<string[]> {
|
||||
const targetBytes = string2Bin(atob(cdetail.p11));
|
||||
const targetLen = targetBytes.length;
|
||||
const nonceLen = cdetail.p6;
|
||||
const answerCount = cdetail.p15;
|
||||
const salt = new Uint8Array(
|
||||
Uint8Array.from(atob(cdetail.p9), (c) => c.charCodeAt(0)),
|
||||
);
|
||||
|
||||
const answers: string[] = [];
|
||||
let nonce = new Uint8Array(nonceLen);
|
||||
crypto.getRandomValues(nonce);
|
||||
intToBytes(0, nonce, nonce.length - 4);
|
||||
let counter = 0;
|
||||
|
||||
while (answers.length < answerCount) {
|
||||
const result = await argon2.hash({
|
||||
pass: nonce,
|
||||
salt,
|
||||
time: cdetail.p2,
|
||||
mem: cdetail.p13,
|
||||
hashLen: cdetail.p7,
|
||||
parallelism: cdetail.p12,
|
||||
type: 2,
|
||||
});
|
||||
|
||||
const hashBytes = result.hash as Uint8Array;
|
||||
|
||||
if (memcmp(hashBytes, targetBytes, targetLen) <= 0) {
|
||||
answers.push(bufferToBase64(nonce));
|
||||
nonce = new Uint8Array(nonceLen);
|
||||
crypto.getRandomValues(nonce);
|
||||
intToBytes(0, nonce, nonce.length - 4);
|
||||
counter = 0;
|
||||
} else {
|
||||
counter++;
|
||||
intToBytes(counter, nonce, nonce.length - 4);
|
||||
}
|
||||
}
|
||||
|
||||
return answers;
|
||||
}
|
||||
|
||||
// ------------------ Public API ------------------
|
||||
|
||||
export async function solveEbayChallenge(
|
||||
html: string,
|
||||
cookieHeader?: string,
|
||||
): Promise<ChallengeResult | null> {
|
||||
const params = parseChallengePage(html);
|
||||
if (!params) return null;
|
||||
|
||||
const answers = await solveArgon2Challenge(params.cdetail);
|
||||
const encodedAnswers = encodeURIComponent(answers.join(","));
|
||||
|
||||
const body = JSON.stringify({
|
||||
iid: params.iid,
|
||||
appName: params.appName,
|
||||
referenceId: params.crefId,
|
||||
pvt: Date.now().toString(),
|
||||
crt: Date.now().toString(),
|
||||
encodedAnswers,
|
||||
p: params.p,
|
||||
ru: params.destUrl,
|
||||
});
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
"content-type": "application/json",
|
||||
accept: "application/json, text/plain, */*",
|
||||
"user-agent": BROWSER_UA,
|
||||
};
|
||||
|
||||
if (cookieHeader) {
|
||||
headers.cookie = cookieHeader;
|
||||
}
|
||||
|
||||
const res = await fetch(`${params.chlghost}/splashui/challengesvc/answer`, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body,
|
||||
});
|
||||
|
||||
if (!res.ok) return null;
|
||||
|
||||
// Collect cookies from answer response
|
||||
const setCookies = res.headers.getSetCookie?.() ?? [];
|
||||
const answerCookies = parseCookiesFromSetCookie(setCookies);
|
||||
|
||||
const cookieEntries = Object.entries(answerCookies);
|
||||
if (cookieEntries.length === 0) return null;
|
||||
|
||||
const cookies = cookieEntries.map(([k, v]) => `${k}=${v}`).join("; ");
|
||||
|
||||
return { cookies };
|
||||
}
|
||||
128
packages/core/src/utils/facebook-challenge.ts
Normal file
128
packages/core/src/utils/facebook-challenge.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
// Facebook Marketplace session & challenge utilities
|
||||
|
||||
// ------------------ Types ------------------
|
||||
|
||||
export type ChallengeType =
|
||||
| "login_wall"
|
||||
| "checkpoint"
|
||||
| "bad_headers"
|
||||
| "rate_limited"
|
||||
| "none";
|
||||
|
||||
// ------------------ Constants ------------------
|
||||
|
||||
const FACEBOOK_BROWSER_HEADERS: Record<string, string> = {
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||
"cache-control": "no-cache",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"sec-fetch-dest": "document",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-site": "none",
|
||||
"sec-fetch-user": "?1",
|
||||
"sec-ch-ua":
|
||||
'"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": '"Linux"',
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
};
|
||||
|
||||
// ------------------ Cookie Management ------------------
|
||||
|
||||
function parseSetCookies(setCookieHeaders: string[]): Record<string, string> {
|
||||
const cookies: Record<string, string> = {};
|
||||
for (const header of setCookieHeaders) {
|
||||
const parts = header.split(";");
|
||||
const firstPart = parts[0]?.trim();
|
||||
if (!firstPart) continue;
|
||||
const eqIdx = firstPart.indexOf("=");
|
||||
if (eqIdx === -1) continue;
|
||||
const name = firstPart.slice(0, eqIdx).trim();
|
||||
const value = firstPart.slice(eqIdx + 1).trim();
|
||||
if (name && value) {
|
||||
cookies[name] = value;
|
||||
}
|
||||
}
|
||||
return cookies;
|
||||
}
|
||||
|
||||
function cookiesToHeader(cookies: Record<string, string>): string {
|
||||
return Object.entries(cookies)
|
||||
.map(([name, value]) => `${name}=${value}`)
|
||||
.join("; ");
|
||||
}
|
||||
|
||||
// ------------------ Session Warmup ------------------
|
||||
|
||||
export async function warmFacebookSession(): Promise<Record<string, string>> {
|
||||
try {
|
||||
const res = await fetch("https://www.facebook.com/", {
|
||||
method: "GET",
|
||||
headers: FACEBOOK_BROWSER_HEADERS,
|
||||
redirect: "manual",
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
|
||||
const setCookies = res.headers.getSetCookie?.() ?? [];
|
||||
return parseSetCookies(setCookies);
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------ Challenge Detection ------------------
|
||||
|
||||
export function detectFacebookChallenge(
|
||||
status: number,
|
||||
html: string,
|
||||
responseUrl: string,
|
||||
): ChallengeType {
|
||||
if (status === 400) {
|
||||
return "bad_headers";
|
||||
}
|
||||
|
||||
if (status === 429) {
|
||||
return "rate_limited";
|
||||
}
|
||||
|
||||
if (responseUrl.includes("/login/")) {
|
||||
return "login_wall";
|
||||
}
|
||||
|
||||
if (html.includes("You must log in") || html.includes("log in to continue")) {
|
||||
return "login_wall";
|
||||
}
|
||||
|
||||
if (
|
||||
responseUrl.includes("/checkpoint/") ||
|
||||
(html.includes("checkpoint") && html.includes("challenge"))
|
||||
) {
|
||||
return "checkpoint";
|
||||
}
|
||||
|
||||
return "none";
|
||||
}
|
||||
|
||||
// ------------------ Header Construction ------------------
|
||||
|
||||
export function buildFacebookHeaders(
|
||||
cookieJar: Record<string, string>,
|
||||
extraHeaders?: Record<string, string>,
|
||||
): Record<string, string> {
|
||||
const headers: Record<string, string> = {
|
||||
...FACEBOOK_BROWSER_HEADERS,
|
||||
};
|
||||
|
||||
const cookieString = cookiesToHeader(cookieJar);
|
||||
if (cookieString) {
|
||||
headers.cookie = cookieString;
|
||||
}
|
||||
|
||||
if (extraHeaders) {
|
||||
Object.assign(headers, extraHeaders);
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
@@ -1,56 +1,56 @@
|
||||
import type { HTMLString } from "../types/common";
|
||||
import { delay } from "./delay";
|
||||
|
||||
/** Custom error class for HTTP-related failures */
|
||||
export class HttpError extends Error {
|
||||
override name = "HttpError";
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly statusCode: number,
|
||||
public readonly url?: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for network failures (timeouts, connection issues) */
|
||||
export class NetworkError extends Error {
|
||||
override name = "NetworkError";
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly url: string,
|
||||
public readonly cause?: Error,
|
||||
public override readonly cause?: Error,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "NetworkError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for parsing failures */
|
||||
export class ParseError extends Error {
|
||||
override name = "ParseError";
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly data?: unknown,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "ParseError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for rate limiting */
|
||||
export class RateLimitError extends Error {
|
||||
override name = "RateLimitError";
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly url: string,
|
||||
public readonly resetTime?: number,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "RateLimitError";
|
||||
}
|
||||
}
|
||||
|
||||
/** Error class for validation failures */
|
||||
export class ValidationError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = "ValidationError";
|
||||
}
|
||||
override name = "ValidationError";
|
||||
}
|
||||
|
||||
/** Type guard to check if a value is a record (object) */
|
||||
@@ -61,10 +61,57 @@ export function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
/**
|
||||
* Calculate exponential backoff delay with jitter
|
||||
*/
|
||||
function calculateBackoffDelay(attempt: number, baseMs: number): number {
|
||||
function calculateBackoffDelay(
|
||||
attempt: number,
|
||||
baseMs: number,
|
||||
jitter: () => number = Math.random,
|
||||
): number {
|
||||
const exponentialDelay = baseMs * 2 ** attempt;
|
||||
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
|
||||
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
|
||||
const jitterDelay = jitter() * 0.1 * exponentialDelay; // 10% jitter
|
||||
return Math.min(exponentialDelay + jitterDelay, 30000); // Cap at 30 seconds
|
||||
}
|
||||
|
||||
const MAX_RATE_LIMIT_WAIT_MS = 30_000;
|
||||
const MAX_DELTA_RESET_SECONDS = 86_400;
|
||||
|
||||
function mergeHeaders(
|
||||
defaultHeaders: Record<string, string>,
|
||||
customHeaders?: Record<string, string>,
|
||||
): Record<string, string> {
|
||||
const merged: Record<string, string> = {};
|
||||
|
||||
for (const [key, value] of Object.entries(defaultHeaders)) {
|
||||
merged[key.toLowerCase()] = value;
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(customHeaders ?? {})) {
|
||||
merged[key.toLowerCase()] = value;
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
function calculateRateLimitWaitMs(
|
||||
resetHeader: string | null,
|
||||
fallbackWaitMs: number,
|
||||
): number {
|
||||
if (!resetHeader) return fallbackWaitMs;
|
||||
|
||||
const resetValue = Number(resetHeader);
|
||||
if (!Number.isFinite(resetValue)) return fallbackWaitMs;
|
||||
|
||||
const waitMs =
|
||||
resetValue <= MAX_DELTA_RESET_SECONDS
|
||||
? resetValue * 1000
|
||||
: resetValue * 1000 - Date.now();
|
||||
|
||||
return Math.min(Math.max(0, waitMs), MAX_RATE_LIMIT_WAIT_MS);
|
||||
}
|
||||
|
||||
/** Result type when includeResponseUrl is true */
|
||||
export interface FetchHtmlResult {
|
||||
html: HTMLString;
|
||||
responseUrl: string;
|
||||
}
|
||||
|
||||
/** Options for fetchHtml */
|
||||
@@ -74,6 +121,8 @@ export interface FetchHtmlOptions {
|
||||
timeoutMs?: number;
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
headers?: Record<string, string>;
|
||||
includeResponseUrl?: boolean;
|
||||
jitter?: () => number;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -81,14 +130,24 @@ export interface FetchHtmlOptions {
|
||||
* @param url - The URL to fetch
|
||||
* @param delayMs - Delay in milliseconds between requests (rate limiting)
|
||||
* @param opts - Optional fetch options
|
||||
* @returns The HTML content as a string
|
||||
* @returns The HTML content as a string, or an object with html and responseUrl
|
||||
* @throws HttpError, NetworkError, or RateLimitError on failure
|
||||
*/
|
||||
export async function fetchHtml(
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts: FetchHtmlOptions & { includeResponseUrl: true },
|
||||
): Promise<FetchHtmlResult>;
|
||||
export async function fetchHtml(
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts?: FetchHtmlOptions,
|
||||
): Promise<string> {
|
||||
): Promise<HTMLString>;
|
||||
export async function fetchHtml(
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts?: FetchHtmlOptions,
|
||||
): Promise<HTMLString | FetchHtmlResult> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 1000;
|
||||
const timeoutMs = opts?.timeoutMs ?? 30000;
|
||||
@@ -119,13 +178,17 @@ export async function fetchHtml(
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: { ...defaultHeaders, ...opts?.headers },
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
clearTimeout(timeoutId);
|
||||
const res = await (async () => {
|
||||
try {
|
||||
return await fetch(url, {
|
||||
method: "GET",
|
||||
headers: mergeHeaders(defaultHeaders, opts?.headers),
|
||||
signal: controller.signal,
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
})();
|
||||
|
||||
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
||||
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
||||
@@ -137,12 +200,17 @@ export async function fetchHtml(
|
||||
const resetSeconds = rateLimitReset
|
||||
? Number(rateLimitReset)
|
||||
: Number.NaN;
|
||||
const waitMs = Number.isFinite(resetSeconds)
|
||||
? Math.max(0, resetSeconds * 1000)
|
||||
: calculateBackoffDelay(attempt, retryBaseMs);
|
||||
const waitMs = calculateRateLimitWaitMs(
|
||||
rateLimitReset,
|
||||
calculateBackoffDelay(
|
||||
attempt,
|
||||
retryBaseMs,
|
||||
opts?.jitter ?? Math.random,
|
||||
),
|
||||
);
|
||||
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||
await delay(waitMs);
|
||||
continue;
|
||||
}
|
||||
throw new RateLimitError(
|
||||
@@ -154,8 +222,12 @@ export async function fetchHtml(
|
||||
|
||||
// Retry on server errors
|
||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
await delay(
|
||||
calculateBackoffDelay(
|
||||
attempt,
|
||||
retryBaseMs,
|
||||
opts?.jitter ?? Math.random,
|
||||
),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@@ -170,8 +242,10 @@ export async function fetchHtml(
|
||||
const html = await res.text();
|
||||
|
||||
// Respect per-request delay to maintain rate limiting
|
||||
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
||||
return html;
|
||||
await delay(delayMs);
|
||||
return opts?.includeResponseUrl
|
||||
? { html, responseUrl: res.url || url }
|
||||
: html;
|
||||
} catch (err) {
|
||||
// Re-throw known errors
|
||||
if (
|
||||
@@ -184,8 +258,12 @@ export async function fetchHtml(
|
||||
|
||||
if (err instanceof Error && err.name === "AbortError") {
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
await delay(
|
||||
calculateBackoffDelay(
|
||||
attempt,
|
||||
retryBaseMs,
|
||||
opts?.jitter ?? Math.random,
|
||||
),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@@ -194,8 +272,12 @@ export async function fetchHtml(
|
||||
|
||||
// Network or other errors
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
await delay(
|
||||
calculateBackoffDelay(
|
||||
attempt,
|
||||
retryBaseMs,
|
||||
opts?.jitter ?? Math.random,
|
||||
),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
10
packages/core/src/utils/logger.ts
Normal file
10
packages/core/src/utils/logger.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
const isTest = () => process.env.NODE_ENV === "test";
|
||||
|
||||
export const logger = {
|
||||
log: (...args: Parameters<typeof console.log>) => {
|
||||
if (!isTest()) console.log(...args);
|
||||
},
|
||||
warn: (...args: Parameters<typeof console.warn>) => {
|
||||
if (!isTest()) console.warn(...args);
|
||||
},
|
||||
};
|
||||
58
packages/core/src/utils/unstable.ts
Normal file
58
packages/core/src/utils/unstable.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
import type { UnstableListingBuckets } from "../types/common";
|
||||
|
||||
interface HasListingPrice {
|
||||
listingPrice?: { cents?: number } | null;
|
||||
}
|
||||
|
||||
function getMedian(values: number[]): number {
|
||||
const middleIndex = Math.floor(values.length / 2);
|
||||
|
||||
if (values.length % 2 === 0) {
|
||||
const left = values[middleIndex - 1] ?? 0;
|
||||
const right = values[middleIndex] ?? 0;
|
||||
return (left + right) / 2;
|
||||
}
|
||||
|
||||
return values[middleIndex] ?? 0;
|
||||
}
|
||||
|
||||
export function classifyUnstableListings<T extends HasListingPrice>(
|
||||
listings: T[],
|
||||
): UnstableListingBuckets<T> {
|
||||
const validPrices = listings
|
||||
.map((listing) => listing.listingPrice?.cents)
|
||||
.filter(
|
||||
(price): price is number => Number.isFinite(price) && (price ?? 0) > 0,
|
||||
)
|
||||
.sort((left, right) => left - right);
|
||||
|
||||
if (validPrices.length < 2) {
|
||||
return {
|
||||
results: [...listings],
|
||||
unstableResults: [],
|
||||
};
|
||||
}
|
||||
|
||||
const threshold = getMedian(validPrices) * 0.8;
|
||||
const buckets: UnstableListingBuckets<T> = {
|
||||
results: [],
|
||||
unstableResults: [],
|
||||
};
|
||||
|
||||
for (const listing of listings) {
|
||||
const price = listing.listingPrice?.cents;
|
||||
|
||||
if (
|
||||
Number.isFinite(price) &&
|
||||
(price ?? 0) > 0 &&
|
||||
(price ?? 0) < threshold
|
||||
) {
|
||||
buckets.unstableResults.push(listing);
|
||||
continue;
|
||||
}
|
||||
|
||||
buckets.results.push(listing);
|
||||
}
|
||||
|
||||
return buckets;
|
||||
}
|
||||
24
packages/core/test/delay.test.ts
Normal file
24
packages/core/test/delay.test.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
import { afterEach, describe, expect, mock, test } from "bun:test";
|
||||
import { delay } from "../src/utils/delay";
|
||||
|
||||
describe("delay", () => {
|
||||
const originalNodeEnv = process.env.NODE_ENV;
|
||||
const originalSetTimeout = globalThis.setTimeout;
|
||||
|
||||
afterEach(() => {
|
||||
process.env.NODE_ENV = originalNodeEnv;
|
||||
globalThis.setTimeout = originalSetTimeout;
|
||||
});
|
||||
|
||||
test("does not schedule throttle timers during tests", async () => {
|
||||
process.env.NODE_ENV = "test";
|
||||
const setTimeoutMock = mock(() => {
|
||||
throw new Error("setTimeout should not be called during tests");
|
||||
});
|
||||
globalThis.setTimeout = setTimeoutMock as unknown as typeof setTimeout;
|
||||
|
||||
await delay(1000);
|
||||
|
||||
expect(setTimeoutMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -1,17 +1,42 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import type { EbayListingDetails } from "../src/scrapers/ebay";
|
||||
import fetchEbayItems from "../src/scrapers/ebay";
|
||||
import type { UnstableListingBuckets } from "../src/types/common";
|
||||
|
||||
type Assert<T extends true> = T;
|
||||
type IsExact<T, U> =
|
||||
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
|
||||
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
|
||||
? true
|
||||
: false
|
||||
: false;
|
||||
|
||||
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
|
||||
const getUnstableEbayItems = async () =>
|
||||
fetchEbayItems("laptop", 1000, {}, { hideUnstableResults: true });
|
||||
type _EbayDefaultReturn = Assert<
|
||||
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
|
||||
>;
|
||||
type _EbayUnstableReturn = Assert<
|
||||
IsExact<
|
||||
Awaited<ReturnType<typeof getUnstableEbayItems>>,
|
||||
UnstableListingBuckets<EbayListingDetails>
|
||||
>
|
||||
>;
|
||||
|
||||
const originalFetch = global.fetch;
|
||||
const originalWarn = console.warn;
|
||||
|
||||
describe("eBay Scraper Cookie Handling", () => {
|
||||
beforeEach(() => {
|
||||
delete process.env.EBAY_COOKIE;
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () => Promise.resolve("<html><body></body></html>"),
|
||||
}),
|
||||
) as typeof fetch;
|
||||
) as unknown as typeof fetch;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -21,21 +46,724 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
});
|
||||
|
||||
test("should ignore request cookie overrides and rely on EBAY_COOKIE", async () => {
|
||||
const warnMock = mock(() => {});
|
||||
console.warn = warnMock;
|
||||
await fetchEbayItems("laptop", 1000);
|
||||
|
||||
await fetchEbayItems("laptop", 1000, {
|
||||
cookies: "s=from-request",
|
||||
});
|
||||
// First call is homepage warm-up, second is search
|
||||
expect(global.fetch).toHaveBeenCalledTimes(2);
|
||||
|
||||
expect(global.fetch).toHaveBeenCalledTimes(1);
|
||||
// The search request is the second call
|
||||
const secondFetchCall = (global.fetch as unknown as ReturnType<typeof mock>)
|
||||
.mock.calls[1];
|
||||
if (!secondFetchCall) {
|
||||
throw new Error("Expected search fetch to be called");
|
||||
}
|
||||
|
||||
const [, init] = (global.fetch as ReturnType<typeof mock>).mock.calls[0];
|
||||
const [searchUrl, init] = secondFetchCall;
|
||||
const headers = (init as RequestInit).headers as Record<string, string>;
|
||||
|
||||
expect(searchUrl).toBe(
|
||||
"https://www.ebay.ca/sch/i.html?_nkw=laptop&_sacat=0&_from=R40&LH_BIN=1&LH_PrefLoc=1",
|
||||
);
|
||||
expect(headers.Cookie).toBeUndefined();
|
||||
expect(warnMock).toHaveBeenCalledWith(
|
||||
"No valid eBay cookies found in EBAY_COOKIE. eBay may block requests without a raw Cookie header string.",
|
||||
});
|
||||
|
||||
test("keeps relative item links on the ebay.ca host", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({ url: "https://www.ebay.ca/itm/123" }),
|
||||
]);
|
||||
});
|
||||
|
||||
test("returns empty results when eBay rate-limits the request", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 429,
|
||||
headers: { get: () => "0" },
|
||||
text: () => Promise.resolve(""),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("deduplicates repeated item links from the same card", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"><span>Open</span></a>
|
||||
<a href="/itm/123"><span>Image</span></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0]).toEqual(
|
||||
expect.objectContaining({ url: "https://www.ebay.ca/itm/123" }),
|
||||
);
|
||||
});
|
||||
|
||||
test("deduplicates tracking variants of the same item URL", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123?_trkparms=foo"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/123?hash=item123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0]).toEqual(
|
||||
expect.objectContaining({
|
||||
url: "https://www.ebay.ca/itm/123?_trkparms=foo",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
test("deduplicates tracking variants of SEO-style item URLs", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/title-slug/1234567890?_trkparms=foo"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/title-slug/1234567890?hash=item123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/title-slug/9999999999?hash=item999"></a>
|
||||
<h3>Another Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $110.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0]).toEqual(
|
||||
expect.objectContaining({
|
||||
url: "https://www.ebay.ca/itm/title-slug/1234567890?_trkparms=foo",
|
||||
}),
|
||||
);
|
||||
expect(results[1]).toEqual(
|
||||
expect.objectContaining({
|
||||
url: "https://www.ebay.ca/itm/title-slug/9999999999?hash=item999",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
test("treats bare dollar prices as CAD on ebay.ca", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">$100.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({ currency: "CAD" }),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("parses current eBay s-card markup with unquoted item links", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<div class="s-card s-card--horizontal">
|
||||
<div class=su-card-container__header>
|
||||
<a class=s-card__link href=https://ebay.com/itm/1234567890?itmmeta=abc>
|
||||
<div role=heading aria-level=3 class=s-card__title>
|
||||
<span class="su-styled-text primary default">Apple MacBook Air M1 2020 8GB 256GB</span>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class=su-card-container__attributes>
|
||||
<span class="su-styled-text primary bold large-1 s-card__price">CA $599.00</span>
|
||||
</div>
|
||||
</div>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("macbook", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
title: "Apple MacBook Air M1 2020 8GB 256GB",
|
||||
url: "https://ebay.com/itm/1234567890?itmmeta=abc",
|
||||
listingPrice: expect.objectContaining({ cents: 59_900 }),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("parses embedded eBay payload listings before HTML fallback", async () => {
|
||||
const payload = encodeURIComponent(
|
||||
JSON.stringify({
|
||||
searchResults: [
|
||||
{
|
||||
title: "Apple MacBook Air M1 API Result",
|
||||
itemWebUrl: "https://www.ebay.ca/itm/9876543210?hash=item987",
|
||||
price: { value: "550.00", currency: "CAD" },
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<script data-inlinepayload="${payload}"></script>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("macbook", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
title: "Apple MacBook Air M1 API Result",
|
||||
url: "https://www.ebay.ca/itm/9876543210?hash=item987",
|
||||
listingPrice: expect.objectContaining({
|
||||
amountFormatted: "CAD 550.00",
|
||||
cents: 55_000,
|
||||
currency: "CAD",
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("treats US dollar prices as USD", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">US $123.45</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({
|
||||
currency: "USD",
|
||||
cents: 12345,
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("treats US dollar prices without space as USD", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">US$123.45</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({
|
||||
currency: "USD",
|
||||
cents: 12345,
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("maps pound prices to GBP", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">£123.45</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({
|
||||
currency: "GBP",
|
||||
cents: 12345,
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("maps euro and yen prices to the matching currency labels", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Euro Bundle</h3>
|
||||
<span class="s-item__price">€123.45</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="/itm/456"></a>
|
||||
<h3>Yen Bundle</h3>
|
||||
<span class="s-item__price">¥123</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("bundle", 1000, {
|
||||
keywords: ["bundle"],
|
||||
});
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({
|
||||
currency: "EUR",
|
||||
cents: 12345,
|
||||
}),
|
||||
}),
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({
|
||||
currency: "JPY",
|
||||
cents: 12300,
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("prefers the discounted Canadian-formatted price", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">
|
||||
<s>CA $150.00</s>
|
||||
<span>CA $100.00</span>
|
||||
</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({
|
||||
amountFormatted: "CA $100.00",
|
||||
cents: 10000,
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("prefers discounted Canadian prices that contain four consecutive digits", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">
|
||||
<s>CA $1500.00</s>
|
||||
<span>CA $1000.00</span>
|
||||
</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({
|
||||
amountFormatted: "CA $1000.00",
|
||||
cents: 100000,
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("prefers discounted US dollar prices over original prices", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">
|
||||
<s>US $150.00</s>
|
||||
<span>US $100.00</span>
|
||||
</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({
|
||||
amountFormatted: "US $100.00",
|
||||
cents: 10000,
|
||||
currency: "USD",
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("keeps short titles that were not shortened by UI cleaning", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Free Bike</h3>
|
||||
<span class="s-item__price">CA $0.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("bike", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
title: "Free Bike",
|
||||
listingPrice: expect.objectContaining({ cents: 0, currency: "CAD" }),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("accepts higher fallback prices without price classes", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Studio Microphone Bundle</h3>
|
||||
<div>CA $2500.00</div>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("microphone", 1000, {
|
||||
keywords: ["microphone"],
|
||||
});
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
title: "Studio Microphone Bundle",
|
||||
listingPrice: expect.objectContaining({
|
||||
amountFormatted: "CA $2500.00",
|
||||
cents: 250000,
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("retains free items when the requested price range includes zero", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Free Laptop Bundle</h3>
|
||||
<span class="s-item__price">$0.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000, {
|
||||
minPrice: 0,
|
||||
maxPrice: 0,
|
||||
});
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
title: "Free Laptop Bundle",
|
||||
listingPrice: expect.objectContaining({ cents: 0 }),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/1"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/2"></a>
|
||||
<h3>Another Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $110.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/3"></a>
|
||||
<h3>Cheap Laptop Bundle</h3>
|
||||
<span class="s-item__price">CA $70.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems(
|
||||
"laptop",
|
||||
1000,
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({ title: "Stable Laptop Bundle" }),
|
||||
expect.objectContaining({ title: "Another Laptop Bundle" }),
|
||||
],
|
||||
unstableResults: [
|
||||
expect.objectContaining({ title: "Cheap Laptop Bundle" }),
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
test("respects maxItems in default mode", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/1"></a>
|
||||
<h3>First Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/2"></a>
|
||||
<h3>Second Bundle</h3>
|
||||
<span class="s-item__price">CA $110.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/3"></a>
|
||||
<h3>Third Bundle</h3>
|
||||
<span class="s-item__price">CA $70.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000, { maxItems: 2 });
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0]).toEqual(
|
||||
expect.objectContaining({ title: "First Bundle" }),
|
||||
);
|
||||
expect(results[1]).toEqual(
|
||||
expect.objectContaining({ title: "Second Bundle" }),
|
||||
);
|
||||
});
|
||||
|
||||
test("respects maxItems in unstable mode", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/1"></a>
|
||||
<h3>First Bundle</h3>
|
||||
<span class="s-item__price">CA $100.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/2"></a>
|
||||
<h3>Second Bundle</h3>
|
||||
<span class="s-item__price">CA $110.00</span>
|
||||
</li>
|
||||
<li class="s-item">
|
||||
<a href="https://www.ebay.ca/itm/3"></a>
|
||||
<h3>Third Bundle</h3>
|
||||
<span class="s-item__price">CA $70.00</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems(
|
||||
"laptop",
|
||||
1000,
|
||||
{ maxItems: 2 },
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
|
||||
expect(results.results).toHaveLength(2);
|
||||
expect(results.unstableResults).toHaveLength(0);
|
||||
expect(results.results[0]).toEqual(
|
||||
expect.objectContaining({ title: "First Bundle" }),
|
||||
);
|
||||
expect(results.results[1]).toEqual(
|
||||
expect.objectContaining({ title: "Second Bundle" }),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,18 +1,50 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import cliProgress from "cli-progress";
|
||||
import {
|
||||
classifyFacebookResponse,
|
||||
ensureFacebookCookies,
|
||||
extractFacebookBootstrapCandidates,
|
||||
extractFacebookItemData,
|
||||
extractFacebookMarketplaceData,
|
||||
type FacebookListingDetails,
|
||||
fetchFacebookItem,
|
||||
default as fetchFacebookItems,
|
||||
parseFacebookAds,
|
||||
parseFacebookCookieString,
|
||||
parseFacebookItem,
|
||||
} from "../src/scrapers/facebook";
|
||||
import type { UnstableListingBuckets } from "../src/types/common";
|
||||
import { formatCookiesForHeader } from "../src/utils/cookies";
|
||||
import { formatCentsToCurrency } from "../src/utils/format";
|
||||
|
||||
const originalStdoutIsTTY = process.stdout.isTTY;
|
||||
|
||||
type Assert<T extends true> = T;
|
||||
type IsExact<T, U> =
|
||||
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
|
||||
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
|
||||
? true
|
||||
: false
|
||||
: false;
|
||||
|
||||
const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
|
||||
const getUnstableFacebookItems = async (): Promise<
|
||||
UnstableListingBuckets<FacebookListingDetails>
|
||||
> =>
|
||||
fetchFacebookItems("chair", 1, "toronto", 25, { hideUnstableResults: true });
|
||||
type _FacebookDefaultReturn = Assert<
|
||||
IsExact<
|
||||
Awaited<ReturnType<typeof getDefaultFacebookItems>>,
|
||||
FacebookListingDetails[]
|
||||
>
|
||||
>;
|
||||
type _FacebookUnstableReturn = Assert<
|
||||
IsExact<
|
||||
Awaited<ReturnType<typeof getUnstableFacebookItems>>,
|
||||
UnstableListingBuckets<FacebookListingDetails>
|
||||
>
|
||||
>;
|
||||
|
||||
// Mock fetch globally
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
@@ -20,11 +52,12 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
beforeEach(() => {
|
||||
global.fetch = mock(() => {
|
||||
throw new Error("fetch should be mocked in individual tests");
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
process.stdout.isTTY = originalStdoutIsTTY;
|
||||
});
|
||||
|
||||
describe("Cookie Parsing", () => {
|
||||
@@ -37,6 +70,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
expect(result[0]).toEqual({
|
||||
name: "c_user",
|
||||
value: "123456789",
|
||||
rawValue: "123456789",
|
||||
domain: ".facebook.com",
|
||||
path: "/",
|
||||
secure: true,
|
||||
@@ -47,6 +81,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
expect(result[1]).toEqual({
|
||||
name: "xs",
|
||||
value: "abcdef123456",
|
||||
rawValue: "abcdef123456",
|
||||
domain: ".facebook.com",
|
||||
path: "/",
|
||||
secure: true,
|
||||
@@ -60,8 +95,18 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
|
||||
const result = parseFacebookCookieString(cookieString);
|
||||
|
||||
expect(result[0].value).toBe("123+456");
|
||||
expect(result[1].value).toBe("abc=def");
|
||||
expect(result[0]?.value).toBe("123+456");
|
||||
expect(result[1]?.value).toBe("abc=def");
|
||||
});
|
||||
|
||||
test("should preserve raw encoded values when formatting cookie headers", () => {
|
||||
const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
|
||||
const result = formatCookiesForHeader(
|
||||
parseFacebookCookieString(cookieString),
|
||||
"www.facebook.com",
|
||||
);
|
||||
|
||||
expect(result).toBe(cookieString);
|
||||
});
|
||||
|
||||
test("should filter out malformed cookies", () => {
|
||||
@@ -82,10 +127,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
const result = parseFacebookCookieString(cookieString);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0].name).toBe("c_user");
|
||||
expect(result[0].value).toBe("123");
|
||||
expect(result[1].name).toBe("xs");
|
||||
expect(result[1].value).toBe("abc");
|
||||
expect(result[0]?.name).toBe("c_user");
|
||||
expect(result[0]?.value).toBe("123");
|
||||
expect(result[1]?.name).toBe("xs");
|
||||
expect(result[1]?.value).toBe("abc");
|
||||
});
|
||||
|
||||
test("should load Facebook cookies from FACEBOOK_COOKIE env var", async () => {
|
||||
@@ -144,10 +189,6 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
});
|
||||
|
||||
test("should handle authentication errors", async () => {
|
||||
const originalWarn = console.warn;
|
||||
const warnMock = mock(() => {});
|
||||
console.warn = warnMock;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
@@ -157,17 +198,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
try {
|
||||
const result = await fetchFacebookItem("123");
|
||||
expect(result).toBeNull();
|
||||
expect(warnMock).toHaveBeenCalledWith(
|
||||
"Authentication error: Invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
|
||||
);
|
||||
} finally {
|
||||
console.warn = originalWarn;
|
||||
}
|
||||
const result = await fetchFacebookItem("123");
|
||||
expect(result).toBeNull();
|
||||
expect(global.fetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
test("should handle item not found", async () => {
|
||||
@@ -180,7 +215,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const result = await fetchFacebookItem("nonexistent");
|
||||
expect(result).toBeNull();
|
||||
@@ -240,13 +275,37 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
});
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const _result = await fetchFacebookItem("123");
|
||||
expect(attempts).toBe(2);
|
||||
// Should eventually succeed after retry
|
||||
});
|
||||
|
||||
test("should handle exhausted rate limiting retries as a 429", async () => {
|
||||
let attempts = 0;
|
||||
|
||||
global.fetch = mock(() => {
|
||||
attempts++;
|
||||
return Promise.resolve({
|
||||
ok: false,
|
||||
status: 429,
|
||||
headers: {
|
||||
get: (header: string) => {
|
||||
if (header === "X-RateLimit-Reset") return "0";
|
||||
return null;
|
||||
},
|
||||
},
|
||||
text: () => Promise.resolve("Rate limited"),
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const result = await fetchFacebookItem("429-loop");
|
||||
|
||||
expect(result).toBeNull();
|
||||
expect(attempts).toBe(4);
|
||||
});
|
||||
|
||||
test("should handle sold items", async () => {
|
||||
const mockData = {
|
||||
require: [
|
||||
@@ -288,12 +347,107 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const result = await fetchFacebookItem("456");
|
||||
expect(result?.listingStatus).toBe("SOLD");
|
||||
});
|
||||
|
||||
test("should still parse sold items when structured data exists", async () => {
|
||||
const soldStructuredHtml = `
|
||||
<html><body>
|
||||
<div>This item has been sold</div>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<script>
|
||||
${JSON.stringify({
|
||||
payload: {
|
||||
listing: {
|
||||
id: "457",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Structured Sold Item",
|
||||
formatted_price: { text: "CA$90" },
|
||||
listing_price: {
|
||||
amount: "90.00",
|
||||
currency: "CAD",
|
||||
amount_with_offset: "90.00",
|
||||
},
|
||||
is_sold: true,
|
||||
is_live: false,
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(soldStructuredHtml),
|
||||
url: "https://www.facebook.com/marketplace/item/457/",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const result = await fetchFacebookItem("457");
|
||||
|
||||
expect(result).toEqual(
|
||||
expect.objectContaining({
|
||||
title: "Structured Sold Item",
|
||||
listingStatus: "SOLD",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
test("should parse structured data even when an unavailable banner is present", async () => {
|
||||
const unavailableStructuredHtml = `
|
||||
<html><body>
|
||||
<div>This listing is no longer available.</div>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<script>
|
||||
${JSON.stringify({
|
||||
payload: {
|
||||
listing: {
|
||||
id: "458",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Recovered Item",
|
||||
formatted_price: { text: "CA$120" },
|
||||
listing_price: {
|
||||
amount: "120.00",
|
||||
currency: "CAD",
|
||||
amount_with_offset: "120.00",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(unavailableStructuredHtml),
|
||||
url: "https://www.facebook.com/marketplace/item/458/",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const result = await fetchFacebookItem("458");
|
||||
|
||||
expect(result).toEqual(
|
||||
expect.objectContaining({
|
||||
title: "Recovered Item",
|
||||
listingStatus: "ACTIVE",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
test("should handle successful item extraction", async () => {
|
||||
const mockData = {
|
||||
require: [
|
||||
@@ -340,7 +494,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const result = await fetchFacebookItem("789");
|
||||
expect(result).not.toBeNull();
|
||||
@@ -359,7 +513,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const result = await fetchFacebookItem("error");
|
||||
expect(result).toBeNull();
|
||||
@@ -367,6 +521,349 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("fetchFacebookItems", () => {
|
||||
let previousCookie: string | undefined;
|
||||
|
||||
beforeEach(() => {
|
||||
previousCookie = process.env.FACEBOOK_COOKIE;
|
||||
process.env.FACEBOOK_COOKIE = "c_user=12345; xs=abc123";
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (previousCookie === undefined) {
|
||||
delete process.env.FACEBOOK_COOKIE;
|
||||
} else {
|
||||
process.env.FACEBOOK_COOKIE = previousCookie;
|
||||
}
|
||||
});
|
||||
|
||||
test("returns an array by default", async () => {
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
|
||||
{
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Stable Chair Listing",
|
||||
listing_price: {
|
||||
amount: "120.00",
|
||||
formatted_amount: "CA$120",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
)}</script></body></html>`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(mockSearchHtml),
|
||||
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
|
||||
|
||||
expect(Array.isArray(results)).toBe(true);
|
||||
expect(results).toHaveLength(1);
|
||||
});
|
||||
|
||||
test("preserves free listings through the public fetch entrypoint", async () => {
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
|
||||
{
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "free-1",
|
||||
marketplace_listing_title: "Free Chair",
|
||||
listing_price: {
|
||||
amount: "0.00",
|
||||
formatted_amount: "FREE",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
)}</script></body></html>`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(mockSearchHtml),
|
||||
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
title: "Free Chair",
|
||||
listingPrice: expect.objectContaining({
|
||||
cents: 0,
|
||||
amountFormatted: "FREE",
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("does not start a progress bar when stdout is not a TTY", async () => {
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
|
||||
{
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Chair Listing",
|
||||
listing_price: {
|
||||
amount: "120.00",
|
||||
formatted_amount: "CA$120",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
)}</script></body></html>`;
|
||||
|
||||
process.stdout.isTTY = false;
|
||||
const startSpy = mock(() => {});
|
||||
const updateSpy = mock(() => {});
|
||||
const stopSpy = mock(() => {});
|
||||
const originalStart = cliProgress.SingleBar.prototype.start;
|
||||
const originalUpdate = cliProgress.SingleBar.prototype.update;
|
||||
const originalStop = cliProgress.SingleBar.prototype.stop;
|
||||
try {
|
||||
cliProgress.SingleBar.prototype.start = startSpy;
|
||||
cliProgress.SingleBar.prototype.update = updateSpy;
|
||||
cliProgress.SingleBar.prototype.stop = stopSpy;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(mockSearchHtml),
|
||||
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("chair", 1, "toronto", 25);
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(startSpy).not.toHaveBeenCalled();
|
||||
expect(updateSpy).not.toHaveBeenCalled();
|
||||
expect(stopSpy).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
cliProgress.SingleBar.prototype.start = originalStart;
|
||||
cliProgress.SingleBar.prototype.update = originalUpdate;
|
||||
cliProgress.SingleBar.prototype.stop = originalStop;
|
||||
}
|
||||
});
|
||||
|
||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
|
||||
{
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Stable Chair Listing",
|
||||
listing_price: {
|
||||
amount: "100.00",
|
||||
formatted_amount: "CA$100",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "Another Stable Chair",
|
||||
listing_price: {
|
||||
amount: "110.00",
|
||||
formatted_amount: "CA$110",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "3",
|
||||
marketplace_listing_title: "Suspiciously Cheap Chair",
|
||||
listing_price: {
|
||||
amount: "70.00",
|
||||
formatted_amount: "CA$70",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
)}</script></body></html>`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(mockSearchHtml),
|
||||
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("chair", 1, "toronto", 25, {
|
||||
hideUnstableResults: true,
|
||||
});
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({ title: "Stable Chair Listing" }),
|
||||
expect.objectContaining({ title: "Another Stable Chair" }),
|
||||
],
|
||||
unstableResults: [
|
||||
expect.objectContaining({ title: "Suspiciously Cheap Chair" }),
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
test("unstable mode classifies before the final MAX_ITEMS limit", async () => {
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
|
||||
{
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Boundary Stable Chair",
|
||||
listing_price: {
|
||||
amount: "100.00",
|
||||
formatted_amount: "CA$100",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title:
|
||||
"Second Boundary Stable Chair",
|
||||
listing_price: {
|
||||
amount: "110.00",
|
||||
formatted_amount: "CA$110",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "3",
|
||||
marketplace_listing_title: "Past Boundary Cheap Chair",
|
||||
listing_price: {
|
||||
amount: "70.00",
|
||||
formatted_amount: "CA$70",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
)}</script></body></html>`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(mockSearchHtml),
|
||||
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("chair", 1, "toronto", 2, {
|
||||
hideUnstableResults: true,
|
||||
});
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({ title: "Boundary Stable Chair" }),
|
||||
expect.objectContaining({ title: "Second Boundary Stable Chair" }),
|
||||
],
|
||||
unstableResults: [
|
||||
expect.objectContaining({ title: "Past Boundary Cheap Chair" }),
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Data Extraction", () => {
|
||||
describe("extractFacebookItemData", () => {
|
||||
test("extracts item details from Comet permalink bootstrap candidates", () => {
|
||||
@@ -388,7 +885,10 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
},
|
||||
redacted_description: { text: "Solid wood chair" },
|
||||
location_text: { text: "Toronto, ON" },
|
||||
marketplace_listing_seller: { id: "seller-1", name: "Alex" },
|
||||
marketplace_listing_seller: {
|
||||
id: "seller-1",
|
||||
name: "Alex",
|
||||
},
|
||||
condition: "USED",
|
||||
is_live: true,
|
||||
},
|
||||
@@ -633,7 +1133,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
const result = extractFacebookMarketplaceData(html);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result?.[0].node.listing.marketplace_listing_title).toBe(
|
||||
expect(result?.[0]?.node.listing.marketplace_listing_title).toBe(
|
||||
"Item 1",
|
||||
);
|
||||
});
|
||||
@@ -654,11 +1154,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
const result = extractFacebookMarketplaceData(html);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result?.[0].node.listing.id).toBe("987654321");
|
||||
expect(result?.[0].node.listing.marketplace_listing_title).toBe(
|
||||
expect(result?.[0]?.node.listing.id).toBe("987654321");
|
||||
expect(result?.[0]?.node.listing.marketplace_listing_title).toBe(
|
||||
"Vintage Bike",
|
||||
);
|
||||
expect(result?.[0].node.listing.listing_price).toEqual({
|
||||
expect(result?.[0]?.node.listing.listing_price).toEqual({
|
||||
amount: "120.00",
|
||||
formatted_amount: "CA$120",
|
||||
currency: "CAD",
|
||||
@@ -886,7 +1386,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
|
||||
const ads = extractFacebookMarketplaceData(html);
|
||||
expect(ads).toHaveLength(1);
|
||||
expect(ads?.[0].node.listing.marketplace_listing_title).toBe("Bike");
|
||||
expect(ads?.[0]?.node.listing.marketplace_listing_title).toBe("Bike");
|
||||
});
|
||||
|
||||
test("prefers the strongest marketplace edge set when multiple edges arrays exist", () => {
|
||||
@@ -944,7 +1444,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
|
||||
const ads = extractFacebookMarketplaceData(html);
|
||||
expect(ads).toHaveLength(1);
|
||||
expect(ads?.[0].node.listing.id).toBe("right-1");
|
||||
expect(ads?.[0]?.node.listing.id).toBe("right-1");
|
||||
});
|
||||
|
||||
test("rejects mixed edge arrays that contain non-listing entries", () => {
|
||||
@@ -1051,10 +1551,21 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
};
|
||||
|
||||
const result = parseFacebookItem(item);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.title).toBe("Minimal Item");
|
||||
expect(result?.description).toBeUndefined();
|
||||
expect(result?.seller).toBeUndefined();
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("returns null when item price data is present but unparseable", () => {
|
||||
const item = {
|
||||
id: "456b",
|
||||
__typename: "GroupCommerceProductItem" as const,
|
||||
marketplace_listing_title: "Broken Price Item",
|
||||
formatted_price: { text: "price unavailable" },
|
||||
listing_price: { amount: "not-a-number", currency: "CAD" },
|
||||
};
|
||||
|
||||
const result = parseFacebookItem(item);
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should identify vehicle listings", () => {
|
||||
@@ -1158,11 +1669,11 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0].title).toBe("Ad 1");
|
||||
expect(results[0].listingPrice?.cents).toBe(5000);
|
||||
expect(results[0].address).toBe("Toronto");
|
||||
expect(results[1].title).toBe("Ad 2");
|
||||
expect(results[1].address).toBe("Ottawa");
|
||||
expect(results[0]?.title).toBe("Ad 1");
|
||||
expect(results[0]?.listingPrice?.cents).toBe(5000);
|
||||
expect(results[0]?.address).toBe("Toronto");
|
||||
expect(results[1]?.title).toBe("Ad 2");
|
||||
expect(results[1]?.address).toBe("Ottawa");
|
||||
});
|
||||
|
||||
test("should filter out ads without price", () => {
|
||||
@@ -1194,7 +1705,7 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("With Price");
|
||||
expect(results[0]?.title).toBe("With Price");
|
||||
});
|
||||
|
||||
test("should handle malformed ads gracefully", () => {
|
||||
@@ -1217,12 +1728,125 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
node: {
|
||||
// Missing listing
|
||||
},
|
||||
} as { node: { listing?: unknown } },
|
||||
} as unknown as { node: { listing?: unknown } },
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(
|
||||
ads as unknown as Parameters<typeof parseFacebookAds>[0],
|
||||
);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0]?.title).toBe("Valid Ad");
|
||||
});
|
||||
|
||||
test("parses formatted fallback prices with multiple commas", () => {
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "big-price",
|
||||
marketplace_listing_title: "Luxury Home",
|
||||
listing_price: {
|
||||
amount_with_offset_in_currency: "123456789",
|
||||
formatted_amount: "$1,234,567.89",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("Valid Ad");
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({ cents: 123456789 }),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("does not trust amount_with_offset_in_currency without a parseable formatted price", () => {
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "bad-offset",
|
||||
marketplace_listing_title: "Broken Price Listing",
|
||||
listing_price: {
|
||||
amount_with_offset_in_currency: "123456789",
|
||||
formatted_amount: "price unavailable",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("keeps valid free search listings", () => {
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "free-item",
|
||||
marketplace_listing_title: "Free Chair",
|
||||
listing_price: {
|
||||
amount: "0.00",
|
||||
formatted_amount: "FREE",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
title: "Free Chair",
|
||||
listingPrice: expect.objectContaining({
|
||||
cents: 0,
|
||||
amountFormatted: "FREE",
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("keeps free search listings when amount is missing but formatted_amount is FREE", () => {
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "free-no-amount",
|
||||
marketplace_listing_title: "Free Sofa",
|
||||
listing_price: {
|
||||
formatted_amount: "FREE",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
title: "Free Sofa",
|
||||
listingPrice: expect.objectContaining({
|
||||
cents: 0,
|
||||
amountFormatted: "FREE",
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import fetchFacebookItems, { fetchFacebookItem } from "../src/scrapers/facebook";
|
||||
import fetchFacebookItems, {
|
||||
fetchFacebookItem,
|
||||
} from "../src/scrapers/facebook";
|
||||
|
||||
// Mock fetch globally
|
||||
const originalFetch = global.fetch;
|
||||
@@ -13,7 +15,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
process.env.FACEBOOK_COOKIE = facebookCookie;
|
||||
global.fetch = mock(() => {
|
||||
throw new Error("fetch should be mocked in individual tests");
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -27,35 +29,37 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
|
||||
describe("Main Search Function", () => {
|
||||
test("should successfully fetch search results", async () => {
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "iPhone 13",
|
||||
listing_price: {
|
||||
amount: "500.00",
|
||||
formatted_amount: "CA$500",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Toronto" },
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify(
|
||||
{
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "iPhone 13",
|
||||
listing_price: {
|
||||
amount: "500.00",
|
||||
formatted_amount: "CA$500",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Toronto" },
|
||||
},
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
})}</script></body></html>`;
|
||||
)}</script></body></html>`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
@@ -65,11 +69,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("iPhone", 1, "toronto", 25);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("iPhone 13");
|
||||
expect(results[0]?.title).toBe("iPhone 13");
|
||||
});
|
||||
|
||||
test("should filter out items without price", async () => {
|
||||
@@ -131,11 +135,11 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("test", 1, "toronto", 25);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("With Price");
|
||||
expect(results[0]?.title).toBe("With Price");
|
||||
});
|
||||
|
||||
test("should respect MAX_ITEMS parameter", async () => {
|
||||
@@ -186,7 +190,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("test", 1, "toronto", 5);
|
||||
expect(results).toHaveLength(5);
|
||||
@@ -227,7 +231,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"nonexistent query",
|
||||
@@ -248,7 +252,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("test", 1, "toronto", 25);
|
||||
expect(results).toEqual([]);
|
||||
@@ -277,7 +281,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("lamp", 1, "toronto", 25);
|
||||
expect(results).toEqual([]);
|
||||
@@ -318,14 +322,16 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("lamp", 1, "toronto", 25);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("should handle network errors", async () => {
|
||||
global.fetch = mock(() => Promise.reject(new Error("Network error")));
|
||||
global.fetch = mock(() =>
|
||||
Promise.reject(new Error("Network error")),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
await expect(
|
||||
fetchFacebookItems("test", 1, "toronto", 25),
|
||||
@@ -396,7 +402,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
});
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("test", 1, "toronto", 25);
|
||||
expect(attempts).toBe(2);
|
||||
@@ -469,13 +475,13 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("cars", 1, "toronto", 25);
|
||||
expect(results).toHaveLength(2);
|
||||
// Both should be classified as "item" type in search results (vehicle detection is for item details)
|
||||
expect(results[0].title).toBe("2006 Honda Civic");
|
||||
expect(results[1].title).toBe("iPhone 13");
|
||||
expect(results[0]?.title).toBe("2006 Honda Civic");
|
||||
expect(results[1]?.title).toBe("iPhone 13");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -538,7 +544,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"nintendo switch",
|
||||
@@ -547,8 +553,8 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
25,
|
||||
);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("Nintendo Switch");
|
||||
expect(results[0].categoryId).toBe("479353692612078");
|
||||
expect(results[0]?.title).toBe("Nintendo Switch");
|
||||
expect(results[0]?.categoryId).toBe("479353692612078");
|
||||
});
|
||||
|
||||
test("should handle home goods/furniture listings", async () => {
|
||||
@@ -609,12 +615,12 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("table", 1, "toronto", 25);
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].title).toBe("Dining Table");
|
||||
expect(results[0].categoryId).toBe("1569171756675761");
|
||||
expect(results[0]?.title).toBe("Dining Table");
|
||||
expect(results[0]?.categoryId).toBe("1569171756675761");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -631,7 +637,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("test", 1, "toronto", 25);
|
||||
expect(results).toEqual([]);
|
||||
@@ -647,7 +653,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("test", 1, "toronto", 25);
|
||||
expect(results).toEqual([]);
|
||||
@@ -663,7 +669,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchFacebookItems("test", 1, "toronto", 25);
|
||||
expect(results).toEqual([]);
|
||||
@@ -704,7 +710,7 @@ describe("Facebook Marketplace Scraper Integration Tests", () => {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const result = await fetchFacebookItem("123");
|
||||
expect(result).toBeNull();
|
||||
|
||||
124
packages/core/test/http.test.ts
Normal file
124
packages/core/test/http.test.ts
Normal file
@@ -0,0 +1,124 @@
|
||||
import { afterEach, describe, expect, mock, test } from "bun:test";
|
||||
import { fetchHtml } from "../src/utils/http";
|
||||
|
||||
describe("fetchHtml", () => {
|
||||
const originalFetch = global.fetch;
|
||||
const originalNodeEnv = process.env.NODE_ENV;
|
||||
const originalSetTimeout = globalThis.setTimeout;
|
||||
const originalClearTimeout = globalThis.clearTimeout;
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
process.env.NODE_ENV = originalNodeEnv;
|
||||
globalThis.setTimeout = originalSetTimeout;
|
||||
globalThis.clearTimeout = originalClearTimeout;
|
||||
});
|
||||
|
||||
test("does not schedule throttle timers during tests", async () => {
|
||||
process.env.NODE_ENV = "test";
|
||||
const scheduledDelays: number[] = [];
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () => Promise.resolve("<html></html>"),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
globalThis.setTimeout = mock((handler: TimerHandler, timeout?: number) => {
|
||||
scheduledDelays.push(Number(timeout));
|
||||
if (timeout !== 30_000 && typeof handler === "function") {
|
||||
handler();
|
||||
}
|
||||
return 0 as unknown as ReturnType<typeof setTimeout>;
|
||||
}) as unknown as typeof setTimeout;
|
||||
globalThis.clearTimeout = mock(() => {}) as unknown as typeof clearTimeout;
|
||||
|
||||
await fetchHtml("https://example.com", 1000, { timeoutMs: 30_000 });
|
||||
|
||||
expect(scheduledDelays).not.toContain(1000);
|
||||
});
|
||||
|
||||
test("fetchHtml returns responseUrl when includeResponseUrl is true", async () => {
|
||||
process.env.NODE_ENV = "test";
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
status: 200,
|
||||
url: "https://example.test/final",
|
||||
headers: { get: () => null },
|
||||
text: () => Promise.resolve("<html></html>"),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const result = await fetchHtml("https://example.test", 0, {
|
||||
includeResponseUrl: true,
|
||||
});
|
||||
expect(result.html).toBe("<html></html>");
|
||||
expect(result.responseUrl).toBe("https://example.test/final");
|
||||
});
|
||||
|
||||
test("rate limit epoch reset uses bounded wait", async () => {
|
||||
process.env.NODE_ENV = "production";
|
||||
const scheduledDelays: number[] = [];
|
||||
const farFutureEpochSeconds = Math.floor(Date.now() / 1000) + 315_360_000;
|
||||
let calls = 0;
|
||||
|
||||
global.fetch = mock(() => {
|
||||
calls += 1;
|
||||
return Promise.resolve({
|
||||
ok: calls > 1,
|
||||
status: calls > 1 ? 200 : 429,
|
||||
url: "https://example.test",
|
||||
headers: {
|
||||
get: (name: string) =>
|
||||
name === "X-RateLimit-Reset" ? String(farFutureEpochSeconds) : null,
|
||||
},
|
||||
text: () => Promise.resolve("<html></html>"),
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
globalThis.setTimeout = mock((handler: TimerHandler, timeout?: number) => {
|
||||
scheduledDelays.push(Number(timeout));
|
||||
if (timeout !== 1_234_567 && typeof handler === "function") {
|
||||
handler();
|
||||
}
|
||||
return 0 as unknown as ReturnType<typeof setTimeout>;
|
||||
}) as unknown as typeof setTimeout;
|
||||
globalThis.clearTimeout = mock(() => {}) as unknown as typeof clearTimeout;
|
||||
|
||||
await fetchHtml("https://example.test", 0, {
|
||||
maxRetries: 1,
|
||||
timeoutMs: 1_234_567,
|
||||
});
|
||||
|
||||
expect(scheduledDelays).toContain(30_000);
|
||||
expect(scheduledDelays).not.toContain(farFutureEpochSeconds * 1000);
|
||||
});
|
||||
|
||||
test("custom Accept header overrides default accept without duplicate casing", async () => {
|
||||
process.env.NODE_ENV = "test";
|
||||
const customAccept = "text/plain";
|
||||
let requestHeaders: HeadersInit | undefined;
|
||||
|
||||
global.fetch = mock((_url: string | URL | Request, init?: RequestInit) => {
|
||||
requestHeaders = init?.headers;
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
status: 200,
|
||||
url: "https://example.test",
|
||||
headers: { get: () => null },
|
||||
text: () => Promise.resolve("<html></html>"),
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
await fetchHtml("https://example.test", 0, {
|
||||
headers: { Accept: customAccept },
|
||||
});
|
||||
|
||||
expect(requestHeaders).toBeDefined();
|
||||
expect((requestHeaders as Record<string, string>).accept).toBe(
|
||||
customAccept,
|
||||
);
|
||||
expect((requestHeaders as Record<string, string>).Accept).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -1,13 +1,60 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import {
|
||||
buildSearchUrl,
|
||||
type DetailedListing,
|
||||
default as fetchKijijiItems,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
parseDetailedListing,
|
||||
parseSearch,
|
||||
RateLimitError,
|
||||
resolveCategoryId,
|
||||
resolveLocationId,
|
||||
ValidationError,
|
||||
} from "../src/scrapers/kijiji";
|
||||
import type { UnstableListingBuckets } from "../src/types/common";
|
||||
|
||||
type Assert<T extends true> = T;
|
||||
type IsExact<T, U> =
|
||||
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
|
||||
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
|
||||
? true
|
||||
: false
|
||||
: false;
|
||||
|
||||
const getDefaultKijijiItems = async () => fetchKijijiItems("phone");
|
||||
const getUnstableKijijiItems = async (): Promise<
|
||||
UnstableListingBuckets<DetailedListing>
|
||||
> =>
|
||||
fetchKijijiItems(
|
||||
"phone",
|
||||
1000,
|
||||
"https://www.kijiji.ca",
|
||||
{},
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
type _KijijiDefaultReturn = Assert<
|
||||
IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]>
|
||||
>;
|
||||
type _KijijiUnstableReturn = Assert<
|
||||
IsExact<
|
||||
Awaited<ReturnType<typeof getUnstableKijijiItems>>,
|
||||
UnstableListingBuckets<DetailedListing>
|
||||
>
|
||||
>;
|
||||
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
beforeEach(() => {
|
||||
global.fetch = mock(() => {
|
||||
throw new Error("fetch should be mocked in individual tests");
|
||||
}) as unknown as typeof fetch;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
});
|
||||
|
||||
describe("Location and Category Resolution", () => {
|
||||
describe("resolveLocationId", () => {
|
||||
@@ -21,6 +68,7 @@ describe("Location and Category Resolution", () => {
|
||||
expect(resolveLocationId("ontario")).toBe(9004);
|
||||
expect(resolveLocationId("toronto")).toBe(1700273);
|
||||
expect(resolveLocationId("gta")).toBe(1700272);
|
||||
expect(resolveLocationId("Nova Scotia")).toBe(9002);
|
||||
});
|
||||
|
||||
test("should handle case insensitive matching", () => {
|
||||
@@ -77,7 +125,7 @@ describe("URL Construction", () => {
|
||||
sortOrder: "desc",
|
||||
});
|
||||
|
||||
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
|
||||
expect(url).toContain("b-phones/gta/iphone/k0c132l1700272");
|
||||
expect(url).toContain("sort=relevancyDesc");
|
||||
expect(url).toContain("order=DESC");
|
||||
});
|
||||
@@ -97,6 +145,7 @@ describe("URL Construction", () => {
|
||||
sortBy: "date",
|
||||
sortOrder: "asc",
|
||||
});
|
||||
expect(dateUrl.match(/sort=/g)?.length).toBe(1);
|
||||
expect(dateUrl).toContain("sort=DATE");
|
||||
expect(dateUrl).toContain("order=ASC");
|
||||
|
||||
@@ -108,12 +157,23 @@ describe("URL Construction", () => {
|
||||
expect(priceUrl).toContain("order=DESC");
|
||||
});
|
||||
|
||||
test("includes price filters in the generated search URL", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
priceMin: 8000,
|
||||
priceMax: 10000,
|
||||
});
|
||||
|
||||
expect(url).toContain("priceMin=80");
|
||||
expect(url).toContain("priceMax=100");
|
||||
});
|
||||
|
||||
test("should handle string location/category inputs", () => {
|
||||
const url = buildSearchUrl("iphone", {
|
||||
location: "toronto",
|
||||
category: "phones",
|
||||
});
|
||||
|
||||
expect(url).toContain("/b-phones/toronto/");
|
||||
expect(url).toContain("k0c132l1700273"); // phones + toronto
|
||||
});
|
||||
});
|
||||
@@ -155,3 +215,823 @@ describe("Error Classes", () => {
|
||||
expect(error.name).toBe("ValidationError");
|
||||
});
|
||||
});
|
||||
|
||||
describe("fetchKijijiItems", () => {
|
||||
test("filters fetched listings by priceMin and priceMax", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": {
|
||||
url: "/v-low/k0l0",
|
||||
title: "Low Listing",
|
||||
},
|
||||
"Listing:2": {
|
||||
url: "/v-mid/k0l0",
|
||||
title: "Mid Listing",
|
||||
},
|
||||
"Listing:3": {
|
||||
url: "/v-high/k0l0",
|
||||
title: "High Listing",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, amount: number, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
global.fetch = mock((input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/k0c0l1700272")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-low/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(listingHtml("Low Listing", 7000, "v-low/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-mid/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(listingHtml("Mid Listing", 9000, "v-mid/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-high/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(listingHtml("High Listing", 12000, "v-high/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
1000,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1, priceMin: 8000, priceMax: 10000 },
|
||||
);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({ title: "Mid Listing" }),
|
||||
]);
|
||||
});
|
||||
|
||||
test("respects REQUESTS_PER_SECOND without concurrent detail fetch bursts", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": { url: "/v-one/k0l0", title: "One" },
|
||||
"Listing:2": { url: "/v-two/k0l0", title: "Two" },
|
||||
"Listing:3": { url: "/v-three/k0l0", title: "Three" },
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount: 10000, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
let activeDetailRequests = 0;
|
||||
let maxActiveDetailRequests = 0;
|
||||
|
||||
global.fetch = mock(async (input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/k0c0l1700272")) {
|
||||
return {
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
activeDetailRequests++;
|
||||
maxActiveDetailRequests = Math.max(
|
||||
maxActiveDetailRequests,
|
||||
activeDetailRequests,
|
||||
);
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 5));
|
||||
|
||||
activeDetailRequests--;
|
||||
|
||||
if (url.endsWith("/v-one/k0l0")) {
|
||||
return {
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("One", "v-one/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-two/k0l0")) {
|
||||
return {
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Two", "v-two/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-three/k0l0")) {
|
||||
return {
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Three", "v-three/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
1,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1 },
|
||||
);
|
||||
|
||||
expect(results).toHaveLength(3);
|
||||
expect(maxActiveDetailRequests).toBe(1);
|
||||
});
|
||||
|
||||
test("allows bounded concurrency to scale with REQUESTS_PER_SECOND", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": { url: "/v-one/k0l0", title: "One" },
|
||||
"Listing:2": { url: "/v-two/k0l0", title: "Two" },
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount: 10000, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
let activeDetailRequests = 0;
|
||||
let maxActiveDetailRequests = 0;
|
||||
|
||||
global.fetch = mock(async (input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/k0c0l1700272")) {
|
||||
return {
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
activeDetailRequests++;
|
||||
maxActiveDetailRequests = Math.max(
|
||||
maxActiveDetailRequests,
|
||||
activeDetailRequests,
|
||||
);
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 300));
|
||||
|
||||
activeDetailRequests--;
|
||||
|
||||
if (url.endsWith("/v-one/k0l0")) {
|
||||
return {
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("One", "v-one/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-two/k0l0")) {
|
||||
return {
|
||||
ok: true,
|
||||
text: () => Promise.resolve(listingHtml("Two", "v-two/k0l0")),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
4,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1 },
|
||||
);
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
expect(maxActiveDetailRequests).toBeGreaterThan(1);
|
||||
expect(maxActiveDetailRequests).toBeLessThanOrEqual(4);
|
||||
});
|
||||
|
||||
test("classifies the filtered Kijiji result set in unstable mode", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": {
|
||||
url: "/v-stable-one/k0l0",
|
||||
title: "Stable Listing One",
|
||||
},
|
||||
"Listing:2": {
|
||||
url: "/v-stable-two/k0l0",
|
||||
title: "Stable Listing Two",
|
||||
},
|
||||
"Listing:3": {
|
||||
url: "/v-unstable/k0l0",
|
||||
title: "Unstable Listing",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, amount: number, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
global.fetch = mock((input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/k0c0l1700272") && url.includes("priceMin=80")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-one/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-two/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-unstable/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Unstable Listing", 7000, "v-unstable/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
1000,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1, priceMin: 8000 },
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({ title: "Stable Listing One" }),
|
||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||
],
|
||||
unstableResults: [],
|
||||
});
|
||||
});
|
||||
|
||||
test("keeps out-of-range Kijiji listings out of both buckets and median input", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": {
|
||||
url: "/v-stable-one/k0l0",
|
||||
title: "Stable Listing One",
|
||||
},
|
||||
"Listing:2": {
|
||||
url: "/v-stable-two/k0l0",
|
||||
title: "Stable Listing Two",
|
||||
},
|
||||
"Listing:3": {
|
||||
url: "/v-out-of-range-high/k0l0",
|
||||
title: "Out Of Range High",
|
||||
},
|
||||
"Listing:4": {
|
||||
url: "/v-out-of-range-low/k0l0",
|
||||
title: "Out Of Range Low",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, amount: number, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
global.fetch = mock((input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (
|
||||
url.includes("/k0c0l1700272") &&
|
||||
url.includes("priceMin=80") &&
|
||||
url.includes("priceMax=150")
|
||||
) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-one/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-two/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-out-of-range-high/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml(
|
||||
"Out Of Range High",
|
||||
20000,
|
||||
"v-out-of-range-high/k0l0",
|
||||
),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-out-of-range-low/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Out Of Range Low", 7000, "v-out-of-range-low/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
1000,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1, priceMin: 8000, priceMax: 15000 },
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({ title: "Stable Listing One" }),
|
||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||
],
|
||||
unstableResults: [],
|
||||
});
|
||||
});
|
||||
|
||||
test("parseDetailedListing ignores non-root listing-like entities", async () => {
|
||||
const html = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"SearchListingCard:1": {
|
||||
url: "/v-card/k0l0",
|
||||
title: "Card Listing",
|
||||
},
|
||||
"Listing:detail": {
|
||||
url: "/v-detailed/k0l0",
|
||||
title: "Detailed Listing",
|
||||
price: { amount: 10000, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const result = await parseDetailedListing(html, "https://www.kijiji.ca");
|
||||
|
||||
expect(result).toEqual(
|
||||
expect.objectContaining({ title: "Detailed Listing" }),
|
||||
);
|
||||
});
|
||||
|
||||
test("fetchSellerDetails does not fire concurrent GraphQL requests", async () => {
|
||||
const html = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: "/v-test/k0l0",
|
||||
title: "Test Listing",
|
||||
price: { amount: 10000, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
posterInfo: { posterId: "123" },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
let activeAnvilRequests = 0;
|
||||
let maxActiveAnvilRequests = 0;
|
||||
|
||||
global.fetch = mock(async (input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/anvil/api")) {
|
||||
activeAnvilRequests++;
|
||||
maxActiveAnvilRequests = Math.max(
|
||||
maxActiveAnvilRequests,
|
||||
activeAnvilRequests,
|
||||
);
|
||||
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||
activeAnvilRequests--;
|
||||
return {
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ data: { user: {} } }),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
await parseDetailedListing(html, "https://www.kijiji.ca", {
|
||||
includeClientSideData: true,
|
||||
sellerDataDepth: "detailed",
|
||||
});
|
||||
|
||||
expect(maxActiveAnvilRequests).toBe(1);
|
||||
});
|
||||
|
||||
test("returns results and unstableResults when unstable mode is enabled", async () => {
|
||||
const searchHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:1": {
|
||||
url: "/v-stable-one/k0l0",
|
||||
title: "Stable Listing One",
|
||||
},
|
||||
"Listing:2": {
|
||||
url: "/v-stable-two/k0l0",
|
||||
title: "Stable Listing Two",
|
||||
},
|
||||
"Listing:3": {
|
||||
url: "/v-unstable/k0l0",
|
||||
title: "Unstable Listing",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const listingHtml = (title: string, amount: number, slug: string) => `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"Listing:detail": {
|
||||
url: `/${slug}`,
|
||||
title,
|
||||
price: { amount, currency: "CAD", type: "FIXED" },
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
global.fetch = mock((input: string | URL | Request) => {
|
||||
const url = typeof input === "string" ? input : input.toString();
|
||||
|
||||
if (url.includes("/k0c0l1700272")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(searchHtml),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-one/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-stable-two/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.endsWith("/v-unstable/k0l0")) {
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(
|
||||
listingHtml("Unstable Listing", 7000, "v-unstable/k0l0"),
|
||||
),
|
||||
headers: { get: () => null },
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected URL: ${url}`);
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchKijijiItems(
|
||||
"phone",
|
||||
1000,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1 },
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [
|
||||
expect.objectContaining({ title: "Stable Listing One" }),
|
||||
expect.objectContaining({ title: "Stable Listing Two" }),
|
||||
],
|
||||
unstableResults: [expect.objectContaining({ title: "Unstable Listing" })],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseSearch", () => {
|
||||
test("ignores SearchListingCard noise keys", () => {
|
||||
const html = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"SearchListingCard:1": {
|
||||
url: "/v-card-noise/k0l0",
|
||||
title: "Card Noise",
|
||||
},
|
||||
"Listing:1": {
|
||||
url: "/v-real-result/k0l0",
|
||||
title: "Real Result",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
expect(parseSearch(html, "https://www.kijiji.ca")).toEqual([
|
||||
{
|
||||
listingLink: "https://www.kijiji.ca/v-real-result/k0l0",
|
||||
name: "Real Result",
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -13,7 +13,7 @@ describe("HTML Parsing Integration", () => {
|
||||
// Mock fetch for all tests
|
||||
global.fetch = mock(() => {
|
||||
throw new Error("fetch should be mocked in individual tests");
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@@ -111,7 +111,7 @@ describe("HTML Parsing Integration", () => {
|
||||
`;
|
||||
|
||||
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
|
||||
expect(results[0].listingLink).toBe(
|
||||
expect(results[0]?.listingLink).toBe(
|
||||
"https://www.kijiji.ca/v-iphone/k0l0",
|
||||
);
|
||||
});
|
||||
@@ -146,7 +146,49 @@ describe("HTML Parsing Integration", () => {
|
||||
|
||||
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].name).toBe("iPhone 13 Pro");
|
||||
expect(results[0]?.name).toBe("iPhone 13 Pro");
|
||||
});
|
||||
|
||||
test("should parse current StandardListing search records", () => {
|
||||
const mockHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
ROOT_QUERY: { test: "value" },
|
||||
"StandardListing:123": {
|
||||
__typename: "StandardListing",
|
||||
url: "https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
|
||||
title: "iPhone 13",
|
||||
},
|
||||
"StandardListing:456": {
|
||||
__typename: "StandardListing",
|
||||
url: "/v-cell-phone/city-of-toronto/iphone-14/456",
|
||||
title: "iPhone 14",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
|
||||
expect(results).toEqual([
|
||||
{
|
||||
name: "iPhone 13",
|
||||
listingLink:
|
||||
"https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
|
||||
},
|
||||
{
|
||||
name: "iPhone 14",
|
||||
listingLink:
|
||||
"https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-14/456",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test("should return empty array for invalid HTML", () => {
|
||||
@@ -303,6 +345,118 @@ describe("HTML Parsing Integration", () => {
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("should parse current StandardListing detail records", async () => {
|
||||
const mockHtml = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"StandardListing:123": {
|
||||
__typename: "StandardListing",
|
||||
url: "https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
|
||||
title: "iPhone 13",
|
||||
description: "Lightly used iPhone 13",
|
||||
price: {
|
||||
__typename: "AmountPrice",
|
||||
amount: 45000,
|
||||
currency: "CAD",
|
||||
type: "FIXED",
|
||||
},
|
||||
type: "OFFER",
|
||||
status: "ACTIVE",
|
||||
activationDate: "2026-04-20T10:00:00.000Z",
|
||||
metrics: { views: "12" },
|
||||
location: {
|
||||
id: 1700273,
|
||||
name: "City of Toronto",
|
||||
address: "Toronto, ON",
|
||||
coordinates: {
|
||||
latitude: 43.6532,
|
||||
longitude: -79.3832,
|
||||
},
|
||||
},
|
||||
imageUrls: ["https://media.kijiji.ca/api/v1/image1.jpg"],
|
||||
categoryId: 760,
|
||||
adSource: "ORGANIC",
|
||||
flags: {
|
||||
topAd: false,
|
||||
priceDrop: false,
|
||||
},
|
||||
posterInfo: {
|
||||
posterId: "user123",
|
||||
rating: 4.5,
|
||||
},
|
||||
attributes: {
|
||||
__typename: "StandardListingAttributes",
|
||||
all: [
|
||||
{
|
||||
__typename: "ListingAttributeV2",
|
||||
canonicalName: "forsaleby",
|
||||
canonicalValues: ["ownr"],
|
||||
},
|
||||
{
|
||||
__typename: "ListingAttributeV2",
|
||||
canonicalName: "phonebrand",
|
||||
canonicalValues: ["apple"],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
const result = await parseDetailedListing(
|
||||
mockHtml,
|
||||
"https://www.kijiji.ca",
|
||||
);
|
||||
expect(result).toEqual({
|
||||
url: "https://www.kijiji.ca/v-cell-phone/city-of-toronto/iphone-13/123",
|
||||
title: "iPhone 13",
|
||||
description: "Lightly used iPhone 13",
|
||||
listingPrice: {
|
||||
amountFormatted: "$450.00",
|
||||
cents: 45000,
|
||||
currency: "CAD",
|
||||
},
|
||||
listingType: "OFFER",
|
||||
listingStatus: "ACTIVE",
|
||||
creationDate: "2026-04-20T10:00:00.000Z",
|
||||
endDate: undefined,
|
||||
numberOfViews: 12,
|
||||
address: "Toronto, ON",
|
||||
images: ["https://media.kijiji.ca/api/v1/image1.jpg"],
|
||||
categoryId: 760,
|
||||
adSource: "ORGANIC",
|
||||
flags: {
|
||||
topAd: false,
|
||||
priceDrop: false,
|
||||
},
|
||||
attributes: {
|
||||
forsaleby: ["ownr"],
|
||||
phonebrand: ["apple"],
|
||||
},
|
||||
location: {
|
||||
id: 1700273,
|
||||
name: "City of Toronto",
|
||||
coordinates: {
|
||||
latitude: 43.6532,
|
||||
longitude: -79.3832,
|
||||
},
|
||||
},
|
||||
sellerInfo: {
|
||||
posterId: "user123",
|
||||
rating: 4.5,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test("should handle missing optional fields", async () => {
|
||||
const mockHtml = `
|
||||
<html>
|
||||
|
||||
35
packages/core/test/live/ebay.live.test.ts
Normal file
35
packages/core/test/live/ebay.live.test.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import fetchEbayItems from "../../src/scrapers/ebay";
|
||||
|
||||
const LIVE_RESULT_LIMIT = 3;
|
||||
const LIVE_TEST_TIMEOUT_MS = 30_000;
|
||||
|
||||
describe("eBay live parser", () => {
|
||||
test(
|
||||
"scrapes live search results into listing details",
|
||||
async () => {
|
||||
const results = await fetchEbayItems("iphone", 1, {
|
||||
maxItems: LIVE_RESULT_LIMIT,
|
||||
});
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
for (const listing of results) {
|
||||
if (!listing.listingPrice) {
|
||||
throw new Error(`Expected listing price for ${listing.url}`);
|
||||
}
|
||||
if (typeof listing.listingPrice.cents !== "number") {
|
||||
throw new Error(`Expected listing cents for ${listing.url}`);
|
||||
}
|
||||
if (!listing.listingPrice.currency) {
|
||||
throw new Error(`Expected listing currency for ${listing.url}`);
|
||||
}
|
||||
|
||||
expect(listing.url).toStartWith("https://");
|
||||
expect(listing.title.length).toBeGreaterThan(0);
|
||||
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
|
||||
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
|
||||
}
|
||||
},
|
||||
LIVE_TEST_TIMEOUT_MS,
|
||||
);
|
||||
});
|
||||
44
packages/core/test/live/facebook.live.test.ts
Normal file
44
packages/core/test/live/facebook.live.test.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import fetchFacebookItems from "../../src/scrapers/facebook";
|
||||
|
||||
const LIVE_RESULT_LIMIT = 3;
|
||||
const LIVE_TEST_TIMEOUT_MS = 30_000;
|
||||
|
||||
describe("Facebook live parser", () => {
|
||||
test(
|
||||
"scrapes live marketplace search results into listing details",
|
||||
async () => {
|
||||
if (!process.env.FACEBOOK_COOKIE?.trim()) {
|
||||
throw new Error("FACEBOOK_COOKIE is required for Facebook live tests");
|
||||
}
|
||||
|
||||
const results = await fetchFacebookItems(
|
||||
"iphone",
|
||||
1,
|
||||
"toronto",
|
||||
LIVE_RESULT_LIMIT,
|
||||
);
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
for (const listing of results) {
|
||||
if (!listing.listingPrice) {
|
||||
throw new Error(`Expected listing price for ${listing.url}`);
|
||||
}
|
||||
if (typeof listing.listingPrice.cents !== "number") {
|
||||
throw new Error(`Expected listing cents for ${listing.url}`);
|
||||
}
|
||||
if (!listing.listingPrice.currency) {
|
||||
throw new Error(`Expected listing currency for ${listing.url}`);
|
||||
}
|
||||
|
||||
expect(listing.url).toStartWith(
|
||||
"https://www.facebook.com/marketplace/item/",
|
||||
);
|
||||
expect(listing.title.length).toBeGreaterThan(0);
|
||||
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
|
||||
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
|
||||
}
|
||||
},
|
||||
LIVE_TEST_TIMEOUT_MS,
|
||||
);
|
||||
});
|
||||
38
packages/core/test/live/kijiji.live.test.ts
Normal file
38
packages/core/test/live/kijiji.live.test.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import fetchKijijiItems from "../../src/scrapers/kijiji";
|
||||
|
||||
const LIVE_TEST_TIMEOUT_MS = 30_000;
|
||||
|
||||
describe("Kijiji live parser", () => {
|
||||
test(
|
||||
"scrapes live search results into detailed listings",
|
||||
async () => {
|
||||
const results = await fetchKijijiItems(
|
||||
"iphone",
|
||||
1,
|
||||
"https://www.kijiji.ca",
|
||||
{ maxPages: 1 },
|
||||
{ includeImages: false, sellerDataDepth: "basic" },
|
||||
);
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
for (const listing of results) {
|
||||
if (!listing.listingPrice) {
|
||||
throw new Error(`Expected listing price for ${listing.url}`);
|
||||
}
|
||||
if (typeof listing.listingPrice.cents !== "number") {
|
||||
throw new Error(`Expected listing cents for ${listing.url}`);
|
||||
}
|
||||
if (!listing.listingPrice.currency) {
|
||||
throw new Error(`Expected listing currency for ${listing.url}`);
|
||||
}
|
||||
|
||||
expect(listing.url).toStartWith("https://www.kijiji.ca/");
|
||||
expect(listing.title.length).toBeGreaterThan(0);
|
||||
expect(listing.listingPrice.cents).toBeGreaterThanOrEqual(0);
|
||||
expect(listing.listingPrice.currency.length).toBeGreaterThan(0);
|
||||
}
|
||||
},
|
||||
LIVE_TEST_TIMEOUT_MS,
|
||||
);
|
||||
});
|
||||
29
packages/core/test/logger.test.ts
Normal file
29
packages/core/test/logger.test.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import { afterEach, describe, expect, mock, test } from "bun:test";
|
||||
|
||||
describe("logger", () => {
|
||||
const originalNodeEnv = process.env.NODE_ENV;
|
||||
const originalConsoleLog = console.log;
|
||||
const originalConsoleWarn = console.warn;
|
||||
|
||||
afterEach(() => {
|
||||
process.env.NODE_ENV = originalNodeEnv;
|
||||
console.log = originalConsoleLog;
|
||||
console.warn = originalConsoleWarn;
|
||||
});
|
||||
|
||||
test("suppresses log and warn output during tests", async () => {
|
||||
process.env.NODE_ENV = "test";
|
||||
const logMock = mock(() => {});
|
||||
const warnMock = mock(() => {});
|
||||
console.log = logMock;
|
||||
console.warn = warnMock;
|
||||
|
||||
const { logger } = await import("../src/utils/logger");
|
||||
|
||||
logger.log("hidden log");
|
||||
logger.warn("hidden warn");
|
||||
|
||||
expect(logMock).not.toHaveBeenCalled();
|
||||
expect(warnMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -1,11 +1,6 @@
|
||||
// Test setup for Bun test runner
|
||||
// This file is loaded before any tests run due to bunfig.toml preload
|
||||
|
||||
// Mock fetch globally for tests
|
||||
global.fetch =
|
||||
global.fetch ||
|
||||
(() => {
|
||||
throw new Error("fetch is not available in test environment");
|
||||
});
|
||||
|
||||
// Add any global test utilities here
|
||||
global.fetch = Object.assign(
|
||||
() => {
|
||||
throw new Error("Tests must mock fetch explicitly");
|
||||
},
|
||||
{ preconnect: fetch.preconnect },
|
||||
) as typeof fetch;
|
||||
|
||||
101
packages/core/test/unstable-listing-mode.test.ts
Normal file
101
packages/core/test/unstable-listing-mode.test.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import type { ListingDetails } from "../src/types/common";
|
||||
import { classifyUnstableListings } from "../src/utils/unstable";
|
||||
|
||||
interface TestListing extends ListingDetails {
|
||||
id: string;
|
||||
}
|
||||
|
||||
function makeListing(id: string, cents: number): TestListing {
|
||||
return {
|
||||
id,
|
||||
url: `https://example.com/${id}`,
|
||||
title: id,
|
||||
listingPrice: {
|
||||
amountFormatted: `$${(cents / 100).toFixed(2)}`,
|
||||
cents,
|
||||
currency: "CAD",
|
||||
},
|
||||
listingType: "test",
|
||||
listingStatus: "active",
|
||||
};
|
||||
}
|
||||
|
||||
describe("classifyUnstableListings", () => {
|
||||
test("moves listings below 80% of median into unstableResults", () => {
|
||||
const listings = [
|
||||
makeListing("stable-1", 100_00),
|
||||
makeListing("stable-2", 110_00),
|
||||
makeListing("unstable", 70_00),
|
||||
];
|
||||
|
||||
const buckets = classifyUnstableListings(listings);
|
||||
|
||||
expect(buckets.results.map((listing) => listing.id)).toEqual([
|
||||
"stable-1",
|
||||
"stable-2",
|
||||
]);
|
||||
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual([
|
||||
"unstable",
|
||||
]);
|
||||
});
|
||||
|
||||
test("uses the midpoint median for even-sized priced inputs", () => {
|
||||
const listings = [
|
||||
makeListing("low", 79_00),
|
||||
makeListing("mid-low", 100_00),
|
||||
makeListing("mid-high", 120_00),
|
||||
makeListing("high", 140_00),
|
||||
];
|
||||
|
||||
const buckets = classifyUnstableListings(listings);
|
||||
|
||||
expect(buckets.results.map((listing) => listing.id)).toEqual([
|
||||
"mid-low",
|
||||
"mid-high",
|
||||
"high",
|
||||
]);
|
||||
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual([
|
||||
"low",
|
||||
]);
|
||||
});
|
||||
|
||||
test("keeps non-positive prices in results and excludes them from the median input", () => {
|
||||
const listings = [
|
||||
makeListing("zero", 0),
|
||||
makeListing("negative", -500),
|
||||
makeListing("stable-1", 100_00),
|
||||
makeListing("stable-2", 120_00),
|
||||
makeListing("unstable", 70_00),
|
||||
];
|
||||
|
||||
const buckets = classifyUnstableListings(listings);
|
||||
|
||||
expect(buckets.results.map((listing) => listing.id)).toEqual([
|
||||
"zero",
|
||||
"negative",
|
||||
"stable-1",
|
||||
"stable-2",
|
||||
]);
|
||||
expect(buckets.unstableResults.map((listing) => listing.id)).toEqual([
|
||||
"unstable",
|
||||
]);
|
||||
});
|
||||
|
||||
test("returns all listings in results when fewer than two valid prices are present", () => {
|
||||
const listings = [
|
||||
makeListing("zero", 0),
|
||||
makeListing("negative", -100),
|
||||
makeListing("only-valid", 150_00),
|
||||
];
|
||||
|
||||
const buckets = classifyUnstableListings(listings);
|
||||
|
||||
expect(buckets.results.map((listing) => listing.id)).toEqual([
|
||||
"zero",
|
||||
"negative",
|
||||
"only-valid",
|
||||
]);
|
||||
expect(buckets.unstableResults).toEqual([]);
|
||||
});
|
||||
});
|
||||
@@ -1,13 +1,9 @@
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"lib": ["dom"],
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
},
|
||||
"strict": true,
|
||||
"noEmit": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"include": ["./src", "./test", "../../types/**/*.d.ts"]
|
||||
}
|
||||
|
||||
@@ -21,5 +21,6 @@
|
||||
|
||||
## Verify
|
||||
|
||||
- `bun test packages/mcp-server/test`
|
||||
- `bun run --cwd packages/mcp-server build`
|
||||
- `bun run ci`
|
||||
|
||||
@@ -2,18 +2,22 @@
|
||||
"name": "@marketplace-scrapers/mcp-server",
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"module": "./src/index.ts",
|
||||
"exports": {
|
||||
".": "./src/index.ts"
|
||||
},
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"start": "bun ./src/index.ts",
|
||||
"dev": "bun --watch ./src/index.ts",
|
||||
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/mcp"
|
||||
"build": "bun build ./src/index.ts --target=bun --outdir=../../dist/mcp",
|
||||
"typecheck": "bun tsgo"
|
||||
},
|
||||
"dependencies": {
|
||||
"@marketplace-scrapers/core": "workspace:*"
|
||||
"@marketplace-scrapers/core": "workspace:*",
|
||||
"@typescript/native-preview": "catalog:"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/bun": "latest"
|
||||
"@types/bun": "catalog:"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"typescript": "^5"
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { logger } from "./logger";
|
||||
import { handleMcpRequest } from "./protocol/handler";
|
||||
import { serverCard } from "./protocol/metadata";
|
||||
|
||||
@@ -33,4 +34,4 @@ const server = Bun.serve({
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`MCP Server running on ${server.hostname}:${server.port}`);
|
||||
logger.log(`MCP Server running on ${server.hostname}:${server.port}`);
|
||||
|
||||
10
packages/mcp-server/src/logger.ts
Normal file
10
packages/mcp-server/src/logger.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
const isTest = () => process.env.NODE_ENV === "test";
|
||||
|
||||
export const logger = {
|
||||
log: (...args: Parameters<typeof console.log>) => {
|
||||
if (!isTest()) console.log(...args);
|
||||
},
|
||||
error: (...args: Parameters<typeof console.error>) => {
|
||||
if (!isTest()) console.error(...args);
|
||||
},
|
||||
};
|
||||
@@ -1,7 +1,33 @@
|
||||
import { logger } from "../logger";
|
||||
import { tools } from "./tools";
|
||||
|
||||
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
|
||||
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default
|
||||
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000;
|
||||
|
||||
async function callMarketplaceApi(
|
||||
marketplace: string,
|
||||
params: URLSearchParams,
|
||||
): Promise<unknown> {
|
||||
const url = `${API_BASE_URL}/${marketplace}?${params.toString()}`;
|
||||
logger.log(`[MCP] Calling ${marketplace} API`);
|
||||
const response = await Promise.race([
|
||||
fetch(url),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() => reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
logger.error(
|
||||
`[MCP] ${marketplace} API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle MCP JSON-RPC 2.0 protocol requests
|
||||
@@ -115,9 +141,10 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
params.append("priceMin", args.priceMin.toString());
|
||||
if (args.priceMax)
|
||||
params.append("priceMax", args.priceMax.toString());
|
||||
if (args.cookies) params.append("cookies", args.cookies);
|
||||
if (args.unstableFilter !== undefined)
|
||||
params.append("unstableFilter", args.unstableFilter.toString());
|
||||
|
||||
console.log(
|
||||
logger.log(
|
||||
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
@@ -133,13 +160,20 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
logger.error(
|
||||
`[MCP] Kijiji API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
let errorMessage = `API returned ${response.status}: ${errorText}`;
|
||||
try {
|
||||
const errorJson = JSON.parse(errorText) as { message?: string };
|
||||
if (errorJson.message) errorMessage = errorJson.message;
|
||||
} catch {
|
||||
// not JSON — use raw text
|
||||
}
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
logger.log(
|
||||
`[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
} else if (name === "search_facebook") {
|
||||
@@ -155,32 +189,10 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
if (args.location) params.append("location", args.location);
|
||||
if (args.maxItems)
|
||||
params.append("maxItems", args.maxItems.toString());
|
||||
if (args.unstableFilter !== undefined)
|
||||
params.append("unstableFilter", args.unstableFilter.toString());
|
||||
|
||||
console.log(
|
||||
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] Facebook API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
result = await callMarketplaceApi("facebook", params);
|
||||
} else if (name === "search_ebay") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
@@ -207,32 +219,10 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
params.append("canadaOnly", args.canadaOnly.toString());
|
||||
if (args.maxItems)
|
||||
params.append("maxItems", args.maxItems.toString());
|
||||
if (args.unstableFilter !== undefined)
|
||||
params.append("unstableFilter", args.unstableFilter.toString());
|
||||
|
||||
console.log(
|
||||
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] eBay API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
result = await callMarketplaceApi("ebay", params);
|
||||
} else {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
|
||||
@@ -11,7 +11,11 @@ export const tools = [
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "Search query for Kijiji listings",
|
||||
description:
|
||||
"Search query for Kijiji listings. " +
|
||||
"Kijiji requires ALL words to appear in the listing title — keep queries short and use terms sellers actually write. " +
|
||||
"Avoid marketing/brand phrases sellers don't use (e.g. use 'macbook air m1' not 'macbook air m1 apple silicon'). " +
|
||||
"If the search returns no results, try a shorter or more common query.",
|
||||
},
|
||||
location: {
|
||||
type: "string",
|
||||
@@ -46,16 +50,16 @@ export const tools = [
|
||||
},
|
||||
priceMin: {
|
||||
type: "number",
|
||||
description: "Minimum price in cents",
|
||||
description: "Minimum price in dollars",
|
||||
},
|
||||
priceMax: {
|
||||
type: "number",
|
||||
description: "Maximum price in cents",
|
||||
description: "Maximum price in dollars",
|
||||
},
|
||||
cookies: {
|
||||
type: "string",
|
||||
unstableFilter: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
|
||||
"optional: when enabled, listings priced more than 20% below the median are moved into an `unstableResults` bucket. Changes the response shape from a plain list to an object with `results` and `unstableResults`.",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
@@ -81,6 +85,11 @@ export const tools = [
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
unstableFilter: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"optional: when enabled, listings priced more than 20% below the median are moved into an `unstableResults` bucket. Changes the response shape from a plain list to an object with `results` and `unstableResults`.",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
@@ -98,11 +107,11 @@ export const tools = [
|
||||
},
|
||||
minPrice: {
|
||||
type: "number",
|
||||
description: "Minimum price filter",
|
||||
description: "Minimum price in dollars",
|
||||
},
|
||||
maxPrice: {
|
||||
type: "number",
|
||||
description: "Maximum price filter",
|
||||
description: "Maximum price in dollars",
|
||||
},
|
||||
strictMode: {
|
||||
type: "boolean",
|
||||
@@ -134,6 +143,11 @@ export const tools = [
|
||||
description: "Maximum number of items to return",
|
||||
default: 5,
|
||||
},
|
||||
unstableFilter: {
|
||||
type: "boolean",
|
||||
description:
|
||||
"optional: when enabled, listings priced more than 20% below the median are moved into an `unstableResults` bucket. Changes the response shape from a plain list to an object with `results` and `unstableResults`.",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
},
|
||||
|
||||
@@ -8,25 +8,20 @@ describe("MCP protocol cookie inputs", () => {
|
||||
beforeEach(() => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve(new Response(JSON.stringify([]), { status: 200 })),
|
||||
) as typeof fetch;
|
||||
) as unknown as typeof fetch;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
});
|
||||
|
||||
test("search tools should not expose Facebook or eBay cookie inputs", () => {
|
||||
const searchFacebookTool = tools.find(
|
||||
(tool) => tool.name === "search_facebook",
|
||||
);
|
||||
const searchEbayTool = tools.find((tool) => tool.name === "search_ebay");
|
||||
|
||||
expect(searchFacebookTool?.inputSchema.properties).not.toHaveProperty(
|
||||
"cookiesSource",
|
||||
);
|
||||
expect(searchEbayTool?.inputSchema.properties).not.toHaveProperty(
|
||||
"cookies",
|
||||
);
|
||||
test("search tools should not expose cookie inputs", () => {
|
||||
const toolNames = ["search_kijiji", "search_facebook", "search_ebay"];
|
||||
for (const toolName of toolNames) {
|
||||
const tool = tools.find((candidate) => candidate.name === toolName);
|
||||
expect(tool?.inputSchema.properties).not.toHaveProperty("cookies");
|
||||
expect(tool?.inputSchema.properties).not.toHaveProperty("cookiesSource");
|
||||
}
|
||||
});
|
||||
|
||||
test("search_facebook should not forward cookies query parameters", async () => {
|
||||
@@ -48,9 +43,245 @@ describe("MCP protocol cookie inputs", () => {
|
||||
}),
|
||||
);
|
||||
|
||||
const calledUrl = (global.fetch as ReturnType<typeof mock>).mock
|
||||
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
|
||||
.calls[0]?.[0];
|
||||
expect(String(calledUrl)).toContain("/facebook?q=laptop");
|
||||
expect(String(calledUrl)).not.toContain("cookies=");
|
||||
});
|
||||
|
||||
test("search_kijiji should not forward cookies query parameters", async () => {
|
||||
await handleMcpRequest(
|
||||
new Request("http://localhost", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: 1,
|
||||
method: "tools/call",
|
||||
params: {
|
||||
name: "search_kijiji",
|
||||
arguments: {
|
||||
query: "laptop",
|
||||
cookies: "s=1",
|
||||
},
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
|
||||
.calls[0]?.[0];
|
||||
expect(String(calledUrl)).toContain("/kijiji?q=laptop");
|
||||
expect(String(calledUrl)).not.toContain("cookies=");
|
||||
});
|
||||
});
|
||||
|
||||
describe("MCP protocol unstableFilter", () => {
|
||||
beforeEach(() => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve(new Response(JSON.stringify([]), { status: 200 })),
|
||||
) as unknown as typeof fetch;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
});
|
||||
|
||||
test("all search tools should document the unstableFilter property", () => {
|
||||
const toolNames = ["search_kijiji", "search_facebook", "search_ebay"];
|
||||
for (const toolName of toolNames) {
|
||||
const tool = tools.find((t) => t.name === toolName);
|
||||
expect(tool).toBeDefined();
|
||||
expect(tool?.inputSchema.properties).toHaveProperty("unstableFilter");
|
||||
const prop = tool?.inputSchema.properties.unstableFilter as {
|
||||
type: string;
|
||||
description: string;
|
||||
};
|
||||
expect(prop.type).toBe("boolean");
|
||||
expect(prop.description).toContain("optional");
|
||||
expect(prop.description).toContain("20%");
|
||||
expect(prop.description).toContain("median");
|
||||
expect(prop.description).toContain("unstableResults");
|
||||
}
|
||||
});
|
||||
|
||||
test("handler should forward unstableFilter=true for search_kijiji", async () => {
|
||||
await handleMcpRequest(
|
||||
new Request("http://localhost", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: 1,
|
||||
method: "tools/call",
|
||||
params: {
|
||||
name: "search_kijiji",
|
||||
arguments: {
|
||||
query: "laptop",
|
||||
unstableFilter: true,
|
||||
},
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
|
||||
.calls[0]?.[0];
|
||||
expect(String(calledUrl)).toContain("unstableFilter=true");
|
||||
});
|
||||
|
||||
test("search_kijiji should document price filters as dollars", () => {
|
||||
const tool = tools.find((candidate) => candidate.name === "search_kijiji");
|
||||
|
||||
const priceMin = tool?.inputSchema.properties.priceMin as {
|
||||
description: string;
|
||||
};
|
||||
const priceMax = tool?.inputSchema.properties.priceMax as {
|
||||
description: string;
|
||||
};
|
||||
|
||||
expect(priceMin.description).toContain("dollars");
|
||||
expect(priceMax.description).toContain("dollars");
|
||||
});
|
||||
|
||||
test("handler should forward Kijiji dollar price filters to API", async () => {
|
||||
await handleMcpRequest(
|
||||
new Request("http://localhost", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: 1,
|
||||
method: "tools/call",
|
||||
params: {
|
||||
name: "search_kijiji",
|
||||
arguments: {
|
||||
query: "macbook",
|
||||
priceMin: 999.99,
|
||||
priceMax: 1000,
|
||||
},
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
|
||||
.calls[0]?.[0];
|
||||
expect(String(calledUrl)).toContain("priceMin=999.99");
|
||||
expect(String(calledUrl)).toContain("priceMax=1000");
|
||||
});
|
||||
|
||||
test("handler should forward unstableFilter=true for search_facebook", async () => {
|
||||
await handleMcpRequest(
|
||||
new Request("http://localhost", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: 1,
|
||||
method: "tools/call",
|
||||
params: {
|
||||
name: "search_facebook",
|
||||
arguments: {
|
||||
query: "laptop",
|
||||
unstableFilter: true,
|
||||
},
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
|
||||
.calls[0]?.[0];
|
||||
expect(String(calledUrl)).toContain("unstableFilter=true");
|
||||
});
|
||||
|
||||
test("tools/call returns API JSON as text content", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve(
|
||||
new Response(JSON.stringify([{ title: "item" }]), { status: 200 }),
|
||||
),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const response = await handleMcpRequest(
|
||||
new Request("http://localhost", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: 1,
|
||||
method: "tools/call",
|
||||
params: {
|
||||
name: "search_facebook",
|
||||
arguments: { query: "laptop" },
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const body = await response.json();
|
||||
expect(body.result.content[0].type).toBe("text");
|
||||
expect(JSON.parse(body.result.content[0].text)).toEqual([
|
||||
{ title: "item" },
|
||||
]);
|
||||
});
|
||||
|
||||
test("handler should forward unstableFilter=true for search_ebay", async () => {
|
||||
await handleMcpRequest(
|
||||
new Request("http://localhost", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: 1,
|
||||
method: "tools/call",
|
||||
params: {
|
||||
name: "search_ebay",
|
||||
arguments: {
|
||||
query: "laptop",
|
||||
unstableFilter: true,
|
||||
},
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
|
||||
.calls[0]?.[0];
|
||||
expect(String(calledUrl)).toContain("unstableFilter=true");
|
||||
});
|
||||
|
||||
test("search_ebay should document price filters as dollars", () => {
|
||||
const tool = tools.find((candidate) => candidate.name === "search_ebay");
|
||||
|
||||
const minPrice = tool?.inputSchema.properties.minPrice as {
|
||||
description: string;
|
||||
};
|
||||
const maxPrice = tool?.inputSchema.properties.maxPrice as {
|
||||
description: string;
|
||||
};
|
||||
|
||||
expect(minPrice.description).toContain("dollars");
|
||||
expect(maxPrice.description).toContain("dollars");
|
||||
});
|
||||
|
||||
test("handler should forward eBay dollar price filters to API", async () => {
|
||||
await handleMcpRequest(
|
||||
new Request("http://localhost", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: 1,
|
||||
method: "tools/call",
|
||||
params: {
|
||||
name: "search_ebay",
|
||||
arguments: {
|
||||
query: "macbook",
|
||||
minPrice: 999.99,
|
||||
maxPrice: 1000,
|
||||
},
|
||||
},
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const calledUrl = (global.fetch as unknown as ReturnType<typeof mock>).mock
|
||||
.calls[0]?.[0];
|
||||
expect(String(calledUrl)).toContain("minPrice=999.99");
|
||||
expect(String(calledUrl)).toContain("maxPrice=1000");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,13 +1,9 @@
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"lib": ["dom"],
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"paths": {
|
||||
"@/*": ["./src/*"]
|
||||
},
|
||||
"strict": true,
|
||||
"noEmit": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"include": ["./src", "./test", "../../types/**/*.d.ts"]
|
||||
}
|
||||
|
||||
25
tsconfig.json
Normal file
25
tsconfig.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"$schema": "https://json.schemastore.org/tsconfig",
|
||||
"extends": "@tsconfig/bun/tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"lib": ["dom", "ESNext"],
|
||||
"target": "ESNext",
|
||||
"module": "preserve",
|
||||
"moduleResolution": "bundler",
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"moduleDetection": "force",
|
||||
"jsx": "react-jsx",
|
||||
"allowJs": true,
|
||||
"allowImportingTsExtensions": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"skipLibCheck": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"noUncheckedIndexedAccess": true,
|
||||
"noImplicitOverride": true,
|
||||
"noUnusedLocals": false,
|
||||
"noUnusedParameters": false,
|
||||
"noPropertyAccessFromIndexSignature": false,
|
||||
"types": ["@types/bun"]
|
||||
}
|
||||
}
|
||||
14
turbo.json
Normal file
14
turbo.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"$schema": "https://turbo.build/schema.json",
|
||||
"tasks": {
|
||||
"typecheck": {},
|
||||
"build": {
|
||||
"dependsOn": ["^build"],
|
||||
"outputs": ["../../dist/**"]
|
||||
},
|
||||
"test": {
|
||||
"dependsOn": ["^build"],
|
||||
"outputs": []
|
||||
}
|
||||
}
|
||||
}
|
||||
25
types/argon2-wasm-pro/index.d.ts
vendored
Normal file
25
types/argon2-wasm-pro/index.d.ts
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
declare module "argon2-wasm-pro" {
|
||||
interface Argon2Options {
|
||||
pass: string | Uint8Array;
|
||||
salt: Uint8Array;
|
||||
time: number;
|
||||
mem: number;
|
||||
hashLen: number;
|
||||
parallelism: number;
|
||||
type: number;
|
||||
}
|
||||
|
||||
interface Argon2Result {
|
||||
hash: Uint8Array;
|
||||
hashHex: string;
|
||||
encoded: string;
|
||||
}
|
||||
|
||||
function hash(options: Argon2Options): Promise<Argon2Result>;
|
||||
|
||||
const argon2: {
|
||||
hash: typeof hash;
|
||||
};
|
||||
|
||||
export default argon2;
|
||||
}
|
||||
Reference in New Issue
Block a user