Compare commits

..

18 Commits

Author SHA1 Message Date
6ab9c4c3a5 chore: biome lint
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-22 22:34:05 -05:00
3919ec0727 chore: biome init
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-22 22:33:52 -05:00
1090ad5bfb fix: update Facebook API endpoint with cookiePath parameter 2026-01-22 19:57:27 -05:00
c937a70db7 feat: add Facebook marketplace item fetching API 2026-01-22 19:56:31 -05:00
59fcbf9ed2 feat: update fetchFacebookItems with cookie auto-loading 2026-01-22 19:56:02 -05:00
d8542eb8f7 feat: parse Facebook marketplace item details and test exports 2026-01-22 19:54:44 -05:00
0a114cf323 feat: extract individual Facebook marketplace items 2026-01-22 19:54:14 -05:00
5f7de1167e fix: add currency style and USD to formatCentsToCurrency 2026-01-22 19:53:53 -05:00
9edafc88c8 feat: add extraction monitoring and metrics logging 2026-01-22 19:52:39 -05:00
5871644e8b refactor: improve search extraction with edge case handling 2026-01-22 19:52:09 -05:00
d5d050013e feat: add Facebook cookie parsing and auto-loading 2026-01-22 19:51:35 -05:00
ff56a29171 feat: add cookiePath parameter to loadFacebookCookies 2026-01-22 19:51:18 -05:00
6a36214528 feat: add FacebookMarketplaceItem interface 2026-01-22 19:48:41 -05:00
7af1be3977 feat: improve Cookie interface type safety 2026-01-22 19:47:37 -05:00
844e566b57 feat: add Facebook cookie parser script 2026-01-22 19:35:47 -05:00
b3be32835a test: add Facebook marketplace test suite 2026-01-22 19:35:38 -05:00
baa34eefdf chore: agent.md
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-22 16:22:06 -05:00
9011ab4793 feat: fmarketplace docs
Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
2026-01-22 01:02:36 -05:00
15 changed files with 5072 additions and 2092 deletions

382
FMARKETPLACE.md Normal file
View File

@@ -0,0 +1,382 @@
# Facebook Marketplace API Reverse Engineering
## Overview
This document tracks findings from reverse-engineering Facebook Marketplace APIs for listing details.
## Current Implementation Status
- Search functionality: Implemented in `src/facebook.ts`
- Individual listing details: Not yet implemented
## Findings
### Step 1: Initial Setup
- Using Chrome DevTools to inspect Facebook Marketplace
- Need to authenticate with Facebook account to access marketplace data
- Cookies required for full access
- Current status: Successfully logged in and accessed marketplace data
### Step 2: Individual Listing Details Analysis - COMPLETED
- **Data Location**: Embedded in HTML script tags within `require` array structure
- **Path**: `require[0][3].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Authentication**: Required for full data access
- **Current Status**: Successfully reverse-engineered the API structure and data extraction method
### API Endpoints Discovered
#### Search Endpoint
- URL: `https://www.facebook.com/marketplace/{location}/search`
- Parameters: `query`, `sortBy`, `exact`
- Data embedded in HTML script tags with `require` structure
- Authentication: Required (cookies)
#### Listing Details Endpoint
- **URL Structure**: `https://www.facebook.com/marketplace/item/{listing_id}/`
- **Data Source**: Server-side rendered HTML with embedded JSON data in script tags
- **Data Structure**: Relay/GraphQL style data structure under `require[0][3].__bbox.require[...].__bbox.result.data.viewer.marketplace_product_details_page.target`
- **Extraction Method**: Parse JSON from script tags containing marketplace data, navigate to the target object
- **Authentication**: Required (cookies)
### Listing Data Structure Discovered (Current - 2026)
The current Facebook Marketplace API returns a comprehensive `GroupCommerceProductItem` object with the following key properties:
```typescript
interface FacebookMarketplaceItem {
// Basic identification
id: string;
__typename: "GroupCommerceProductItem";
// Listing content
marketplace_listing_title: string;
redacted_description: {
text: string;
};
custom_title?: string;
// Pricing
formatted_price: {
text: string;
};
listing_price: {
amount: string;
currency: string;
amount_with_offset: string;
};
// Location
location_text: {
text: string;
};
location: {
latitude: number;
longitude: number;
reverse_geocode_detailed: {
country_alpha_two: string;
postal_code_trimmed: string;
};
};
// Status flags
is_live: boolean;
is_sold: boolean;
is_pending: boolean;
is_hidden: boolean;
is_draft: boolean;
// Timing
creation_time: number;
// Seller information
marketplace_listing_seller: {
__typename: "User";
id: string;
name: string;
profile_picture?: {
uri: string;
};
join_time?: number;
};
// Vehicle-specific fields (for automotive listings)
vehicle_make_display_name?: string;
vehicle_model_display_name?: string;
vehicle_odometer_data?: {
unit: "KILOMETERS" | "MILES";
value: number;
};
vehicle_transmission_type?: "AUTOMATIC" | "MANUAL";
vehicle_exterior_color?: string;
vehicle_interior_color?: string;
vehicle_condition?: "EXCELLENT" | "GOOD" | "FAIR" | "POOR";
vehicle_fuel_type?: string;
vehicle_trim_display_name?: string;
// Category and commerce
marketplace_listing_category_id: string;
condition?: string;
// Commerce features
delivery_types?: string[];
is_shipping_offered?: boolean;
is_buy_now_enabled?: boolean;
can_buyer_make_checkout_offer?: boolean;
// Communication
messaging_enabled?: boolean;
first_message_suggested_value?: string;
// Metadata
logging_id: string;
reportable_ent_id: string;
origin_target?: {
__typename: "Marketplace";
id: string;
};
// Related listings (for part-out sellers)
marketplace_listing_sets?: {
edges: Array<{
node: {
canonical_listing: {
id: string;
marketplace_listing_title: string;
is_live: boolean;
is_sold: boolean;
formatted_price: { text: string };
};
};
}>;
};
}
```
### Example Data Extracted (Current Structure)
```json
{
"__typename": "GroupCommerceProductItem",
"marketplace_listing_title": "2012 Mazda MAZDA 3 PART-OUT",
"id": "1211645920845312",
"redacted_description": {
"text": "FOR PARTS ONLY!!!"
},
"custom_title": "2012 Mazda 3 part-out",
"creation_time": 1760450080,
"location_text": {
"text": "Toronto, ON"
},
"is_live": true,
"is_sold": false,
"is_pending": false,
"is_hidden": false,
"formatted_price": {
"text": "FREE"
},
"listing_price": {
"amount_with_offset": "0",
"currency": "CAD",
"amount": "0.00"
},
"condition": "USED",
"logging_id": "24676483845336407",
"marketplace_listing_category_id": "807311116002614",
"marketplace_listing_seller": {
"__typename": "User",
"id": "61570613529010",
"name": "Jay Heshin",
"profile_picture": {
"uri": "https://scontent-yyz1-1.xx.fbcdn.net/v/t39.30808-1/480952111_122133462296687117_4145652046222010716_n.jpg?stp=cp6_dst-jpg_s50x50_tt6&_nc_cat=108&ccb=1-7&_nc_sid=e99d92&_nc_ohc=x_DTkeriVbgQ7kNvwEqT_x3&_nc_oc=Adnqnqf4YsZxgMIkR2mSFrdLb6-BDw4omCWqG_cqB-H0uXGgK1l4-T-fLSGB_CQJEKo&_nc_zt=24&_nc_ht=scontent-yyz1-1.xx&_nc_gid=7GnSwn4MSbllAgGWJy0RTQ&oh=00_AfpY66l8w-LvHvZ6tTgiD9Qh-Or_Udc-OaFiVL9pQ0YXsg&oe=697797CD"
}
},
"vehicle_condition": "FAIR",
"vehicle_exterior_color": "white",
"vehicle_interior_color": "",
"vehicle_make_display_name": "Mazda",
"vehicle_model_display_name": "3 part-out",
"vehicle_odometer_data": {
"unit": "KILOMETERS",
"value": 999999
},
"vehicle_transmission_type": "AUTOMATIC",
"location": {
"latitude": 43.651428222656,
"longitude": -79.436645507812,
"reverse_geocode_detailed": {
"country_alpha_two": "CA",
"postal_code_trimmed": "M6H 1C1"
}
},
"delivery_types": ["IN_PERSON"],
"messaging_enabled": true,
"first_message_suggested_value": "Hi, is this available?",
"marketplace_listing_sets": {
"edges": [
{
"node": {
"canonical_listing": {
"id": "1435935788228627",
"marketplace_listing_title": "2004 Land Rover LR2 PART-OUT",
"is_live": true,
"formatted_price": {"text": "FREE"}
}
}
}
]
}
}
```
## Data Extraction Method
### Current Method (2026)
Facebook Marketplace listing data is embedded in JSON within `<script>` tags in the HTML response. The extraction process:
1. **Find the Correct Script**: Look for script tags containing marketplace listing data by searching for key fields like `marketplace_listing_title`, `redacted_description`, and `formatted_price`.
2. **Parse JSON Structure**: The data is nested within a `require` array structure:
```
require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target
```
3. **Navigate to Target Object**: The actual listing data is a `GroupCommerceProductItem` object containing comprehensive information about the listing, seller, and vehicle details.
4. **Handle Dynamic Structure**: Facebook may change the exact path, so robust extraction should search for the target object recursively within the parsed JSON.
### Authentication Requirements
- Valid Facebook session cookies are required
- User must be logged in to Facebook
- Marketplace access may be location-restricted
## Tools Used
- Chrome DevTools Protocol
- Network monitoring
- HTML/script parsing
- JSON structure analysis
## Implementation Status
- ✅ Successfully reverse-engineered Facebook Marketplace API for listing details
- ✅ Identified current data structure and extraction method (2026)
- ✅ Documented comprehensive GroupCommerceProductItem interface
- ✅ Implemented `extractFacebookItemData()` function with script parsing logic
- ✅ Implemented `parseFacebookItem()` function to convert GroupCommerceProductItem to ListingDetails
- ✅ Implemented `fetchFacebookItem()` function with authentication and error handling
- ✅ Updated TypeScript interfaces to match current API structure
- ✅ Added robust extraction with fallback methods for changing API paths
## Implementation Details
### Core Functions Implemented
1. **`extractFacebookItemData(htmlString)`**: Extracts marketplace item data from HTML-embedded JSON in script tags
- Searches for scripts containing marketplace listing data
- Uses primary path: `require[0][3][0].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
- Falls back to recursive search for GroupCommerceProductItem objects
2. **`parseFacebookItem(item)`**: Converts Facebook's GroupCommerceProductItem to unified ListingDetails format
- Handles pricing (FREE listings, CAD currency)
- Extracts seller information, location, and status
- Supports vehicle-specific metadata
- Maps Facebook-specific fields to common interface
3. **`fetchFacebookItem(itemId, cookiesSource?)`**: Fetches individual listing details
- Loads Facebook authentication cookies
- Makes authenticated HTTP requests
- Handles rate limiting and retries
- Returns parsed ListingDetails or null on failure
### Authentication Requirements
- Facebook session cookies required in `./cookies/facebook.json` or provided as parameter
- Cookies must include valid authentication tokens for marketplace access
- Handles cookie expiration and domain validation
## Current Implementation Status - 2026 Verification
### Step 3: API Verification and Current Structure Analysis (January 2026)
- **Verification Date**: January 22, 2026
- **Status**: Successfully verified current Facebook Marketplace API structure
- **Data Source**: Embedded JSON in HTML script tags (server-side rendered)
- **Extraction Path**: `require[0][3].__bbox.require[3][3][1].__bbox.result.data.viewer.marketplace_product_details_page.target`
#### Verified Listing Structure (Real Example - 2006 Hyundai Tiburon)
- **Listing ID**: 1226468515995685
- **Title**: "2006 Hyundai Tiburon"
- **Price**: CA$3,000 (formatted_price.text)
- **Raw Price Data**: {"amount_with_offset": "300000", "currency": "CAD", "amount": "3000.00"}
- **Location**: Hamilton, ON (with coordinates: 43.250427246094, -79.963989257812)
- **Description**: "As is" (redacted_description.text)
- **Vehicle Details**:
- Make: Hyundai
- Model: Tiburon
- Odometer: 194,000 km
- Transmission: AUTOMATIC
- Exterior Color: blue
- Interior Color: black
- Fuel Type: GASOLINE
- Number of Owners: TWO
- **Seller Information**:
- Name: Ajitpal Kaler
- ID: 100009257293466
- Profile Picture Available
- Join Time: 1426564800 (2015)
- **Listing Status**: Active (is_live: true, is_sold: false, is_pending: false)
- **Category**: 807311116002614 (Vehicles)
- **Delivery Types**: ["IN_PERSON"]
- **Messaging**: Enabled
#### Current API Characteristics
- **Authentication**: Still requires valid Facebook session cookies
- **Data Format**: Server-side rendered HTML with embedded GraphQL/Relay JSON
- **Structure Stability**: Primary extraction path remains functional
- **Additional Features**: Includes marketplace ratings, seller verification badges, cross-posting info
### API Changes Observed Since 2024 Documentation
- **Minimal Changes**: Core data structure largely unchanged
- **Enhanced Fields**: Added more detailed vehicle specifications and seller profile information
- **GraphQL Integration**: Deeper integration with Facebook's GraphQL infrastructure
- **Security Features**: Additional integrity checks and reporting mechanisms
### Multi-Category Testing Results (January 2026)
Successfully tested extraction across different listing categories:
#### 1. Vehicle Listings (Automotive)
- **Example**: 2006 Hyundai Tiburon (ID: 1226468515995685)
- **Status**: ✅ Fully functional
- **Data Extracted**: Complete vehicle specs, pricing, seller info, location coordinates
- **Unique Fields**: vehicle_make_display_name, vehicle_odometer_data, vehicle_transmission_type, vehicle_exterior_color, vehicle_interior_color, vehicle_fuel_type
#### 2. Electronics Listings
- **Example**: Nintendo Switch (ID: 3903865769914262)
- **Status**: ✅ Fully functional
- **Data Extracted**: Title, price (CA$140), location (Toronto, ON), condition (Used - like new), seller (Yitao Hou)
- **Category**: Electronics (category_id: 479353692612078)
- **Notes**: Standard GroupCommerceProductItem structure applies
#### 3. Home Goods/Furniture Listings
- **Example**: Tabletop Mirror (cat not included) (ID: 1082389057290709)
- **Status**: ✅ Fully functional
- **Data Extracted**: Title, price (CA$5), location (Mississauga, ON), condition (Used - like new), seller (Rohit Rehan)
- **Category**: Home Goods (category_id: 1569171756675761)
- **Notes**: Includes detailed description and delivery options
#### Testing Summary
- **Extraction Method**: Consistent across all categories
- **Data Structure**: GroupCommerceProductItem interface works for all listing types
- **Authentication**: Required for all categories
- **Rate Limiting**: Standard Facebook rate limits apply
- **Edge Cases**: All tested listings were active/in-person pickup
## Implementation Status - COMPLETED (January 2026)
- ✅ Successfully reverse-engineered Facebook Marketplace API for listing details
- ✅ Verified current API structure and extraction method (January 2026)
- ✅ Tested extraction across multiple listing categories (vehicles, electronics, home goods)
- ✅ Implemented comprehensive error handling for sold/removed listings and authentication failures
- ✅ Enhanced rate limiting and retry logic (already robust)
- ✅ Added monitoring and metrics for API stability detection
- ✅ Updated all scraper functions to use verified extraction methods
- ✅ Documented comprehensive GroupCommerceProductItem interface with real examples
## Next Steps (Future Maintenance)
1. Monitor extraction success rates for API change detection
2. Update extraction paths if Facebook changes their API structure
3. Add support for additional marketplace features as they become available
4. Implement caching mechanisms for improved performance
5. Add support for marketplace messaging and negotiation features

30
biome.json Normal file
View File

@@ -0,0 +1,30 @@
{
"$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
"vcs": {
"enabled": false,
"clientKind": "git",
"useIgnoreFile": false
},
"files": {
"ignoreUnknown": false,
"ignore": []
},
"formatter": {
"enabled": true,
"indentStyle": "space"
},
"organizeImports": {
"enabled": true
},
"linter": {
"enabled": true,
"rules": {
"recommended": true
}
},
"javascript": {
"formatter": {
"quoteStyle": "double"
}
}
}

View File

@@ -0,0 +1,183 @@
#!/usr/bin/env bun
/**
* Facebook Cookie Parser CLI
*
* Parses Facebook cookie strings into JSON format for the marketplace scraper
*
* Usage:
* bun run scripts/parse-facebook-cookies.ts "c_user=123; xs=abc"
* bun run scripts/parse-facebook-cookies.ts --input cookies.txt
* echo "c_user=123; xs=abc" | bun run scripts/parse-facebook-cookies.ts
* bun run scripts/parse-facebook-cookies.ts "cookie_string" --output my-cookies.json
*/
import { parseFacebookCookieString } from "../src/facebook";
interface Cookie {
name: string;
value: string;
domain: string;
path: string;
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
expirationDate?: number;
storeId?: string;
}
function parseFacebookCookieStringCLI(cookieString: string): Cookie[] {
if (!cookieString || !cookieString.trim()) {
console.error("❌ Error: Empty or invalid cookie string provided");
process.exit(1);
}
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length === 0) {
console.error("❌ Error: No valid cookies found in input string");
console.error('Expected format: "name1=value1; name2=value2;"');
process.exit(1);
}
return cookies;
}
async function main() {
const args = process.argv.slice(2);
if (args.length === 0 && process.stdin.isTTY === false) {
// Read from stdin
let input = "";
for await (const chunk of process.stdin) {
input += chunk;
}
input = input.trim();
if (!input) {
console.error("❌ Error: No input provided via stdin");
process.exit(1);
}
const cookies = parseFacebookCookieStringCLI(input);
await writeOutput(cookies, "./cookies/facebook.json");
return;
}
let cookieString = "";
let outputPath = "./cookies/facebook.json";
let inputPath = "";
// Parse command line arguments
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg === "--input" || arg === "-i") {
inputPath = args[i + 1];
i++; // Skip next arg
} else if (arg === "--output" || arg === "-o") {
outputPath = args[i + 1];
i++; // Skip next arg
} else if (arg === "--help" || arg === "-h") {
showHelp();
return;
} else if (!arg.startsWith("-")) {
// Assume this is the cookie string
cookieString = arg;
} else {
console.error(`❌ Unknown option: ${arg}`);
showHelp();
process.exit(1);
}
}
// Read from file if specified
if (inputPath) {
try {
const file = Bun.file(inputPath);
if (!(await file.exists())) {
console.error(`❌ Error: Input file not found: ${inputPath}`);
process.exit(1);
}
cookieString = await file.text();
} catch (error) {
console.error(`❌ Error reading input file: ${error}`);
process.exit(1);
}
}
if (!cookieString.trim()) {
console.error("❌ Error: No cookie string provided");
console.error(
"Provide cookie string as argument, --input file, or via stdin",
);
showHelp();
process.exit(1);
}
const cookies = parseFacebookCookieStringCLI(cookieString);
await writeOutput(cookies, outputPath);
}
async function writeOutput(cookies: Cookie[], outputPath: string) {
try {
await Bun.write(outputPath, JSON.stringify(cookies, null, 2));
console.log(`✅ Successfully parsed ${cookies.length} Facebook cookies`);
console.log(`📁 Saved to: ${outputPath}`);
// Show summary of parsed cookies
console.log("\n📋 Parsed cookies:");
for (const cookie of cookies) {
console.log(
`${cookie.name}: ${cookie.value.substring(0, 20)}${cookie.value.length > 20 ? "..." : ""}`,
);
}
} catch (error) {
console.error(`❌ Error writing to output file: ${error}`);
process.exit(1);
}
}
function showHelp() {
console.log(`
Facebook Cookie Parser CLI
Parses Facebook cookie strings into JSON format for the marketplace scraper.
USAGE:
bun run scripts/parse-facebook-cookies.ts [OPTIONS] [COOKIE_STRING]
EXAMPLES:
# Parse from command line argument
bun run scripts/parse-facebook-cookies.ts "c_user=123; xs=abc"
# Parse from file
bun run scripts/parse-facebook-cookies.ts --input cookies.txt
# Parse from stdin
echo "c_user=123; xs=abc" | bun run scripts/parse-facebook-cookies.ts
# Output to custom file
bun run scripts/parse-facebook-cookies.ts "cookie_string" --output my-cookies.json
OPTIONS:
-i, --input FILE Read cookie string from file
-o, --output FILE Output file path (default: ./cookies/facebook.json)
-h, --help Show this help message
COOKIE FORMAT:
Semicolon-separated name=value pairs
Example: "c_user=123456789; xs=abcdef123456; fr=xyz789"
OUTPUT:
JSON array of cookie objects saved to ./cookies/facebook.json
`);
}
// Run the CLI
if (import.meta.main) {
main().catch((error) => {
console.error(`❌ Unexpected error: ${error}`);
process.exit(1);
});
}

View File

@@ -1,97 +1,103 @@
import cliProgress from "cli-progress";
/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom";
import cliProgress from "cli-progress";
// ----------------------------- Types -----------------------------
type HTMLString = string;
type ListingDetails = {
url: string;
title: string;
description?: string;
listingPrice?: {
amountFormatted: string;
cents?: number;
currency?: string;
};
listingType?: string;
listingStatus?: string;
creationDate?: string;
endDate?: string;
numberOfViews?: number;
address?: string | null;
url: string;
title: string;
description?: string;
listingPrice?: {
amountFormatted: string;
cents?: number;
currency?: string;
};
listingType?: string;
listingStatus?: string;
creationDate?: string;
endDate?: string;
numberOfViews?: number;
address?: string | null;
};
// ----------------------------- Utilities -----------------------------
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null;
return typeof value === "object" && value !== null;
}
async function delay(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
await new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Turns cents to localized currency string.
*/
function formatCentsToCurrency(
num: number | string | undefined,
locale = "en-US",
num: number | string | undefined,
locale = "en-US",
): string {
if (num == null) return "";
const cents = typeof num === "string" ? Number.parseInt(num, 10) : num;
if (Number.isNaN(cents)) return "";
const dollars = cents / 100;
const formatter = new Intl.NumberFormat(locale, {
minimumFractionDigits: 2,
maximumFractionDigits: 2,
useGrouping: true,
});
return formatter.format(dollars);
if (num == null) return "";
const cents = typeof num === "string" ? Number.parseInt(num, 10) : num;
if (Number.isNaN(cents)) return "";
const dollars = cents / 100;
const formatter = new Intl.NumberFormat(locale, {
minimumFractionDigits: 2,
maximumFractionDigits: 2,
useGrouping: true,
});
return formatter.format(dollars);
}
/**
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
*/
function parseEbayPrice(priceText: string): { cents: number; currency: string } | null {
if (!priceText || typeof priceText !== 'string') return null;
function parseEbayPrice(
priceText: string,
): { cents: number; currency: string } | null {
if (!priceText || typeof priceText !== "string") return null;
// Clean up the price text and extract currency and amount
const cleaned = priceText.trim();
// Clean up the price text and extract currency and amount
const cleaned = priceText.trim();
// Find all numbers in the string (including decimals)
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
if (!numberMatches) return null;
// Find all numbers in the string (including decimals)
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
if (!numberMatches) return null;
const amountStr = numberMatches[0].replace(/,/g, '');
const dollars = parseFloat(amountStr);
if (isNaN(dollars)) return null;
const amountStr = numberMatches[0].replace(/,/g, "");
const dollars = Number.parseFloat(amountStr);
if (Number.isNaN(dollars)) return null;
const cents = Math.round(dollars * 100);
const cents = Math.round(dollars * 100);
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
let currency = 'USD'; // Default
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
let currency = "USD"; // Default
if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) {
currency = 'CAD';
} else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) {
currency = 'USD';
}
if (
cleaned.toUpperCase().includes("CAD") ||
cleaned.includes("CA$") ||
cleaned.includes("C $")
) {
currency = "CAD";
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
currency = "USD";
}
return { cents, currency };
return { cents, currency };
}
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- HTTP Client -----------------------------
@@ -102,69 +108,71 @@ class HttpError extends Error {
- Respects X-RateLimit-Reset when present (seconds)
*/
async function fetchHtml(
url: string,
DELAY_MS: number,
opts?: {
maxRetries?: number;
retryBaseMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void;
},
url: string,
DELAY_MS: number,
opts?: {
maxRetries?: number;
retryBaseMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void;
},
): Promise<HTMLString> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500;
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const res = await fetch(url, {
method: "GET",
headers: {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-CA,en-US;q=0.9,en;q=0.8",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
},
});
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const res = await fetch(url, {
method: "GET",
headers: {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-CA,en-US;q=0.9,en;q=0.8",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
},
});
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
await delay(waitMs);
continue;
}
// Retry on 5xx
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await delay((attempt + 1) * retryBaseMs);
continue;
}
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
}
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
await delay(waitMs);
continue;
}
// Retry on 5xx
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await delay((attempt + 1) * retryBaseMs);
continue;
}
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
}
const html = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
return html;
} catch (err) {
if (attempt >= maxRetries) throw err;
await delay((attempt + 1) * retryBaseMs);
}
}
const html = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
return html;
} catch (err) {
if (attempt >= maxRetries) throw err;
await delay((attempt + 1) * retryBaseMs);
}
}
throw new Error("Exhausted retries without response");
throw new Error("Exhausted retries without response");
}
// ----------------------------- Parsing -----------------------------
@@ -173,273 +181,321 @@ async function fetchHtml(
Parse eBay search page HTML and extract listings using DOM selectors
*/
function parseEbayListings(
htmlString: HTMLString,
keywords: string[],
exclusions: string[],
strictMode: boolean
htmlString: HTMLString,
keywords: string[],
exclusions: string[],
strictMode: boolean,
): ListingDetails[] {
const { document } = parseHTML(htmlString);
const results: ListingDetails[] = [];
const { document } = parseHTML(htmlString);
const results: ListingDetails[] = [];
// Find all listing links by looking for eBay item URLs (/itm/)
const linkElements = document.querySelectorAll('a[href*="itm/"]');
// Find all listing links by looking for eBay item URLs (/itm/)
const linkElements = document.querySelectorAll('a[href*="itm/"]');
for (const linkElement of linkElements) {
try {
// Get href attribute
let href = linkElement.getAttribute("href");
if (!href) continue;
for (const linkElement of linkElements) {
try {
// Get href attribute
let href = linkElement.getAttribute('href');
if (!href) continue;
// Make href absolute
if (!href.startsWith("http")) {
href = href.startsWith("//")
? `https:${href}`
: `https://www.ebay.com${href}`;
}
// Make href absolute
if (!href.startsWith('http')) {
href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`;
}
// Find the container - go up several levels to find the item container
// Modern eBay uses complex nested structures
let container = linkElement.parentElement?.parentElement?.parentElement;
if (!container) {
// Try a different level
container = linkElement.parentElement?.parentElement;
}
if (!container) continue;
// Find the container - go up several levels to find the item container
// Modern eBay uses complex nested structures
let container = linkElement.parentElement?.parentElement?.parentElement;
if (!container) {
// Try a different level
container = linkElement.parentElement?.parentElement;
}
if (!container) continue;
// Extract title - look for heading or title-related elements near the link
// Modern eBay often uses h3, span, or div with text content near the link
let titleElement = container.querySelector(
'h3, [role="heading"], .s-item__title span',
);
// Extract title - look for heading or title-related elements near the link
// Modern eBay often uses h3, span, or div with text content near the link
let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span');
// If no direct title element, try finding text content around the link
if (!titleElement) {
// Look for spans or divs with text near this link
const nearbySpans = container.querySelectorAll("span, div");
for (const span of nearbySpans) {
const text = span.textContent?.trim();
if (
text &&
text.length > 10 &&
text.length < 200 &&
!text.includes("$") &&
!text.includes("item")
) {
titleElement = span;
break;
}
}
}
// If no direct title element, try finding text content around the link
if (!titleElement) {
// Look for spans or divs with text near this link
const nearbySpans = container.querySelectorAll('span, div');
for (const span of nearbySpans) {
const text = span.textContent?.trim();
if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) {
titleElement = span;
break;
}
}
}
let title = titleElement?.textContent?.trim();
let title = titleElement?.textContent?.trim();
// Clean up eBay UI strings that get included in titles
if (title) {
// Remove common eBay UI strings that appear at the end of titles
const uiStrings = [
"Opens in a new window",
"Opens in a new tab",
"Opens in a new window or tab",
"opens in a new window",
"opens in a new tab",
"opens in a new window or tab",
];
// Clean up eBay UI strings that get included in titles
if (title) {
// Remove common eBay UI strings that appear at the end of titles
const uiStrings = [
'Opens in a new window',
'Opens in a new tab',
'Opens in a new window or tab',
'opens in a new window',
'opens in a new tab',
'opens in a new window or tab'
];
for (const uiString of uiStrings) {
const uiIndex = title.indexOf(uiString);
if (uiIndex !== -1) {
title = title.substring(0, uiIndex).trim();
break; // Only remove one UI string per title
}
}
for (const uiString of uiStrings) {
const uiIndex = title.indexOf(uiString);
if (uiIndex !== -1) {
title = title.substring(0, uiIndex).trim();
break; // Only remove one UI string per title
}
}
// If the title became empty or too short after cleaning, skip this item
if (title.length < 10) {
continue;
}
}
// If the title became empty or too short after cleaning, skip this item
if (title.length < 10) {
continue;
}
}
if (!title) continue;
if (!title) continue;
// Skip irrelevant eBay ads
if (title === "Shop on eBay" || title.length < 3) continue;
// Skip irrelevant eBay ads
if (title === "Shop on eBay" || title.length < 3) continue;
// Extract price - look for eBay's price classes, preferring sale/discount prices
let priceElement = container.querySelector(
'[class*="s-item__price"], .s-item__price, [class*="price"]',
);
// Extract price - look for eBay's price classes, preferring sale/discount prices
let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]');
// If no direct price class, look for spans containing $ (but not titles)
if (!priceElement) {
const spansAndElements = container.querySelectorAll(
"span, div, b, em, strong",
);
for (const el of spansAndElements) {
const text = el.textContent?.trim();
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
if (
text?.includes("$") &&
text.length < 100 &&
!text.includes("laptop") &&
!text.includes("computer") &&
!text.includes("intel") &&
!text.includes("core") &&
!text.includes("ram") &&
!text.includes("ssd") &&
!/\d{4}/.test(text) && // Avoid years like "2024"
!text.includes('"') // Avoid measurements
) {
priceElement = el;
break;
}
}
}
// If no direct price class, look for spans containing $ (but not titles)
if (!priceElement) {
const spansAndElements = container.querySelectorAll('span, div, b, em, strong');
for (const el of spansAndElements) {
const text = el.textContent?.trim();
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
if (text && text.includes('$') && text.length < 100 &&
!text.includes('laptop') && !text.includes('computer') && !text.includes('intel') &&
!text.includes('core') && !text.includes('ram') && !text.includes('ssd') &&
! /\d{4}/.test(text) && // Avoid years like "2024"
!text.includes('"') // Avoid measurements
) {
priceElement = el;
break;
}
}
}
// For discounted items, eBay shows both original and sale price
// Prefer sale/current price over original/strikethrough price
if (priceElement) {
// Check if this element or its parent contains multiple price elements
const priceContainer =
priceElement.closest('[class*="s-item__price"]') ||
priceElement.parentElement;
// For discounted items, eBay shows both original and sale price
// Prefer sale/current price over original/strikethrough price
if (priceElement) {
// Check if this element or its parent contains multiple price elements
const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement;
if (priceContainer) {
// Look for all price elements within this container, including strikethrough prices
const allPriceElements = priceContainer.querySelectorAll(
'[class*="s-item__price"], span, b, em, strong, s, del, strike',
);
if (priceContainer) {
// Look for all price elements within this container, including strikethrough prices
const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike');
// Filter to only elements that actually contain prices (not labels)
const actualPrices: HTMLElement[] = [];
for (const el of allPriceElements) {
const text = el.textContent?.trim();
if (
text &&
/^\s*[\$£¥]/u.test(text) &&
text.length < 50 &&
!/\d{4}/.test(text)
) {
actualPrices.push(el);
}
}
// Filter to only elements that actually contain prices (not labels)
const actualPrices: HTMLElement[] = [];
for (const el of allPriceElements) {
const text = el.textContent?.trim();
if (text && /^\s*[\$£¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) {
actualPrices.push(el);
}
}
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
if (actualPrices.length > 1) {
// First, look for prices that are NOT struck through
const nonStrikethroughPrices = actualPrices.filter((el) => {
const tagName = el.tagName.toLowerCase();
const styles =
el.classList.contains("s-strikethrough") ||
el.classList.contains("u-flStrike") ||
el.closest("s, del, strike");
return (
tagName !== "s" &&
tagName !== "del" &&
tagName !== "strike" &&
!styles
);
});
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
if (actualPrices.length > 1) {
// First, look for prices that are NOT struck through
const nonStrikethroughPrices = actualPrices.filter(el => {
const tagName = el.tagName.toLowerCase();
const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') ||
el.closest('s, del, strike');
return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles;
});
if (nonStrikethroughPrices.length > 0) {
// Use the first non-strikethrough price (sale price)
priceElement = nonStrikethroughPrices[0];
} else {
// Fallback: use the last price (likely the most current)
const lastPrice = actualPrices[actualPrices.length - 1];
priceElement = lastPrice;
}
}
}
}
if (nonStrikethroughPrices.length > 0) {
// Use the first non-strikethrough price (sale price)
priceElement = nonStrikethroughPrices[0];
} else {
// Fallback: use the last price (likely the most current)
const lastPrice = actualPrices[actualPrices.length - 1];
priceElement = lastPrice;
}
}
}
}
const priceText = priceElement?.textContent?.trim();
let priceText = priceElement?.textContent?.trim();
if (!priceText) continue;
if (!priceText) continue;
// Parse price into cents and currency
const priceInfo = parseEbayPrice(priceText);
if (!priceInfo) continue;
// Parse price into cents and currency
const priceInfo = parseEbayPrice(priceText);
if (!priceInfo) continue;
// Apply exclusion filters
if (
exclusions.some((exclusion) =>
title.toLowerCase().includes(exclusion.toLowerCase()),
)
) {
continue;
}
// Apply exclusion filters
if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) {
continue;
}
// Apply strict mode filter (title must contain at least one keyword)
if (
strictMode &&
!keywords.some((keyword) =>
title?.toLowerCase().includes(keyword.toLowerCase()),
)
) {
continue;
}
// Apply strict mode filter (title must contain at least one keyword)
if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) {
continue;
}
const listing: ListingDetails = {
url: href,
title,
listingPrice: {
amountFormatted: priceText,
cents: priceInfo.cents,
currency: priceInfo.currency,
},
listingType: "OFFER", // eBay listings are typically offers
listingStatus: "ACTIVE",
address: null, // eBay doesn't typically show detailed addresses in search results
};
const listing: ListingDetails = {
url: href,
title,
listingPrice: {
amountFormatted: priceText,
cents: priceInfo.cents,
currency: priceInfo.currency,
},
listingType: "OFFER", // eBay listings are typically offers
listingStatus: "ACTIVE",
address: null, // eBay doesn't typically show detailed addresses in search results
};
results.push(listing);
} catch (err) {
console.warn(`Error parsing eBay listing: ${err}`);
}
}
results.push(listing);
} catch (err) {
console.warn(`Error parsing eBay listing: ${err}`);
continue;
}
}
return results;
return results;
}
// ----------------------------- Main -----------------------------
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1,
opts: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
} = {},
SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1,
opts: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
} = {},
) {
const {
minPrice = 0,
maxPrice = Number.MAX_SAFE_INTEGER,
strictMode = false,
exclusions = [],
keywords = [SEARCH_QUERY] // Default to search query if no keywords provided
} = opts;
const {
minPrice = 0,
maxPrice = Number.MAX_SAFE_INTEGER,
strictMode = false,
exclusions = [],
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
} = opts;
// Build eBay search URL - use Canadian site and tracking parameters like real browser
const searchUrl = `https://www.ebay.ca/sch/i.html?_nkw=${encodeURIComponent(SEARCH_QUERY)}^&_sacat=0^&_from=R40^&_trksid=p4432023.m570.l1313`;
// Build eBay search URL - use Canadian site and tracking parameters like real browser
const searchUrl = `https://www.ebay.ca/sch/i.html?_nkw=${encodeURIComponent(SEARCH_QUERY)}^&_sacat=0^&_from=R40^&_trksid=p4432023.m570.l1313`;
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
console.log(`Fetching eBay search: ${searchUrl}`);
console.log(`Fetching eBay search: ${searchUrl}`);
try {
// Use custom headers modeled after real browser requests to bypass bot detection
const headers: Record<string, string> = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://www.ebay.ca/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Priority': 'u=0, i'
};
try {
// Use custom headers modeled after real browser requests to bypass bot detection
const headers: Record<string, string> = {
"User-Agent":
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
Referer: "https://www.ebay.ca/",
Connection: "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
Priority: "u=0, i",
};
const res = await fetch(searchUrl, {
method: "GET",
headers,
});
const res = await fetch(searchUrl, {
method: "GET",
headers,
});
if (!res.ok) {
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
searchUrl,
);
}
if (!res.ok) {
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
searchUrl,
);
}
const searchHtml = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
const searchHtml = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
console.log(`\nParsing eBay listings...`);
console.log("\nParsing eBay listings...");
const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode);
const listings = parseEbayListings(
searchHtml,
keywords,
exclusions,
strictMode,
);
// Filter by price range (additional safety check)
const filteredListings = listings.filter(listing => {
const cents = listing.listingPrice?.cents;
return cents && cents >= minPrice && cents <= maxPrice;
});
// Filter by price range (additional safety check)
const filteredListings = listings.filter((listing) => {
const cents = listing.listingPrice?.cents;
return cents && cents >= minPrice && cents <= maxPrice;
});
console.log(`Parsed ${filteredListings.length} eBay listings.`);
return filteredListings;
} catch (err) {
if (err instanceof HttpError) {
console.error(
`Failed to fetch eBay search (${err.status}): ${err.message}`,
);
return [];
}
throw err;
}
console.log(`Parsed ${filteredListings.length} eBay listings.`);
return filteredListings;
} catch (err) {
if (err instanceof HttpError) {
console.error(
`Failed to fetch eBay search (${err.status}): ${err.message}`,
);
return [];
}
throw err;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,187 +1,215 @@
import fetchKijijiItems from "@/kijiji";
import fetchFacebookItems from "@/facebook";
import fetchEbayItems from "@/ebay";
import fetchFacebookItems from "@/facebook";
import fetchKijijiItems from "@/kijiji";
const PORT = process.env.PORT || 4005;
const server = Bun.serve({
port: PORT,
idleTimeout: 0,
routes: {
// Static routes
"/api/status": new Response("OK"),
port: PORT,
idleTimeout: 0,
routes: {
// Static routes
"/api/status": new Response("OK"),
// Dynamic routes
"/api/kijiji": async (req: Request) => {
const reqUrl = new URL(req.url);
// Dynamic routes
"/api/kijiji": async (req: Request) => {
const reqUrl = new URL(req.url);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message:
"Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message:
"Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
// Parse optional parameters with enhanced defaults
const location = reqUrl.searchParams.get("location");
const category = reqUrl.searchParams.get("category");
const maxPagesParam = reqUrl.searchParams.get("maxPages");
const maxPages = maxPagesParam
? Number.parseInt(maxPagesParam, 10)
: 5; // Default: 5 pages
const sortBy = reqUrl.searchParams.get("sortBy") as 'relevancy' | 'date' | 'price' | 'distance' | undefined;
const sortOrder = reqUrl.searchParams.get("sortOrder") as 'asc' | 'desc' | undefined;
// Parse optional parameters with enhanced defaults
const location = reqUrl.searchParams.get("location");
const category = reqUrl.searchParams.get("category");
const maxPagesParam = reqUrl.searchParams.get("maxPages");
const maxPages = maxPagesParam ? Number.parseInt(maxPagesParam, 10) : 5; // Default: 5 pages
const sortBy = reqUrl.searchParams.get("sortBy") as
| "relevancy"
| "date"
| "price"
| "distance"
| undefined;
const sortOrder = reqUrl.searchParams.get("sortOrder") as
| "asc"
| "desc"
| undefined;
// Build search options
const locationValue = location ? (/^\d+$/.test(location) ? Number(location) : location) : 1700272;
const categoryValue = category ? (/^\d+$/.test(category) ? Number(category) : category) : 0;
// Build search options
const locationValue = location
? /^\d+$/.test(location)
? Number(location)
: location
: 1700272;
const categoryValue = category
? /^\d+$/.test(category)
? Number(category)
: category
: 0;
const searchOptions: import("@/kijiji").SearchOptions = {
location: locationValue,
category: categoryValue,
keywords: SEARCH_QUERY,
sortBy: sortBy || 'relevancy',
sortOrder: sortOrder || 'desc',
maxPages,
};
const searchOptions: import("@/kijiji").SearchOptions = {
location: locationValue,
category: categoryValue,
keywords: SEARCH_QUERY,
sortBy: sortBy || "relevancy",
sortOrder: sortOrder || "desc",
maxPages,
};
// Build listing fetch options with enhanced defaults
const listingOptions: import("@/kijiji").ListingFetchOptions = {
includeImages: true, // Always include full image arrays
sellerDataDepth: 'detailed', // Default: detailed seller info
includeClientSideData: false, // GraphQL reviews disabled by default
};
// Build listing fetch options with enhanced defaults
const listingOptions: import("@/kijiji").ListingFetchOptions = {
includeImages: true, // Always include full image arrays
sellerDataDepth: "detailed", // Default: detailed seller info
includeClientSideData: false, // GraphQL reviews disabled by default
};
try {
const items = await fetchKijijiItems(SEARCH_QUERY, 1, undefined, searchOptions, listingOptions);
if (!items || items.length === 0)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
);
return Response.json(items, { status: 200 });
} catch (error) {
console.error("Kijiji scraping error:", error);
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
return Response.json(
{
message: `Scraping failed: ${errorMessage}`,
query: SEARCH_QUERY,
options: { searchOptions, listingOptions }
},
{ status: 500 },
);
}
},
try {
const items = await fetchKijijiItems(
SEARCH_QUERY,
1,
undefined,
searchOptions,
listingOptions,
);
if (!items || items.length === 0)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
);
return Response.json(items, { status: 200 });
} catch (error) {
console.error("Kijiji scraping error:", error);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json(
{
message: `Scraping failed: ${errorMessage}`,
query: SEARCH_QUERY,
options: { searchOptions, listingOptions },
},
{ status: 500 },
);
}
},
"/api/facebook": async (req: Request) => {
const reqUrl = new URL(req.url);
"/api/facebook": async (req: Request) => {
const reqUrl = new URL(req.url);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message:
"Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message:
"Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
const LOCATION = reqUrl.searchParams.get("location") || "toronto";
const COOKIES_SOURCE = reqUrl.searchParams.get("cookies") || undefined;
try {
const items = await fetchFacebookItems(SEARCH_QUERY, 5, LOCATION, 25, COOKIES_SOURCE);
if (!items || items.length === 0)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
);
return Response.json(items, { status: 200 });
} catch (error) {
console.error("Facebook scraping error:", error);
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
return Response.json(
{ message: errorMessage },
{ status: 400 },
);
}
},
try {
const items = await fetchFacebookItems(
SEARCH_QUERY,
5,
LOCATION,
25,
COOKIES_SOURCE,
"./cookies/facebook.json",
);
if (!items || items.length === 0)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
);
return Response.json(items, { status: 200 });
} catch (error) {
console.error("Facebook scraping error:", error);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 });
}
},
"/api/ebay": async (req: Request) => {
const reqUrl = new URL(req.url);
"/api/ebay": async (req: Request) => {
const reqUrl = new URL(req.url);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message:
"Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message:
"Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
// Parse optional parameters with defaults
const minPriceParam = reqUrl.searchParams.get("minPrice");
const minPrice = minPriceParam
? Number.parseInt(minPriceParam, 10)
: undefined;
const maxPriceParam = reqUrl.searchParams.get("maxPrice");
const maxPrice = maxPriceParam
? Number.parseInt(maxPriceParam, 10)
: undefined;
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : [];
const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY];
// Parse optional parameters with defaults
const minPriceParam = reqUrl.searchParams.get("minPrice");
const minPrice = minPriceParam
? Number.parseInt(minPriceParam, 10)
: undefined;
const maxPriceParam = reqUrl.searchParams.get("maxPrice");
const maxPrice = maxPriceParam
? Number.parseInt(maxPriceParam, 10)
: undefined;
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam
? exclusionsParam.split(",").map((s) => s.trim())
: [];
const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam
? keywordsParam.split(",").map((s) => s.trim())
: [SEARCH_QUERY];
try {
const items = await fetchEbayItems(SEARCH_QUERY, 5, {
minPrice,
maxPrice,
strictMode,
exclusions,
keywords,
});
if (!items || items.length === 0)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
);
return Response.json(items, { status: 200 });
} catch (error) {
console.error("eBay scraping error:", error);
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
return Response.json(
{ message: errorMessage },
{ status: 400 },
);
}
},
try {
const items = await fetchEbayItems(SEARCH_QUERY, 5, {
minPrice,
maxPrice,
strictMode,
exclusions,
keywords,
});
if (!items || items.length === 0)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
);
return Response.json(items, { status: 200 });
} catch (error) {
console.error("eBay scraping error:", error);
const errorMessage =
error instanceof Error ? error.message : "Unknown error occurred";
return Response.json({ message: errorMessage }, { status: 400 });
}
},
// Wildcard route for all routes that start with "/api/" and aren't otherwise matched
"/api/*": Response.json({ message: "Not found" }, { status: 404 }),
// Wildcard route for all routes that start with "/api/" and aren't otherwise matched
"/api/*": Response.json({ message: "Not found" }, { status: 404 }),
// // Serve a file by buffering it in memory
// "/favicon.ico": new Response(await Bun.file("./favicon.ico").bytes(), {
// headers: {
// "Content-Type": "image/x-icon",
// },
// }),
},
// // Serve a file by buffering it in memory
// "/favicon.ico": new Response(await Bun.file("./favicon.ico").bytes(), {
// headers: {
// "Content-Type": "image/x-icon",
// },
// }),
},
// (optional) fallback for unmatched routes:
// Required if Bun's version < 1.2.3
fetch(req: Request) {
return new Response("Not Found", { status: 404 });
},
// (optional) fallback for unmatched routes:
// Required if Bun's version < 1.2.3
fetch(req: Request) {
return new Response("Not Found", { status: 404 });
},
});
console.log(`Serving on ${server.hostname}:${server.port}`);

File diff suppressed because it is too large Load Diff

834
test/facebook-core.test.ts Normal file
View File

@@ -0,0 +1,834 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import {
extractFacebookItemData,
extractFacebookMarketplaceData,
fetchFacebookItem,
formatCentsToCurrency,
formatCookiesForHeader,
loadFacebookCookies,
parseFacebookAds,
parseFacebookCookieString,
parseFacebookItem,
} from "../src/facebook";
// Mock fetch globally
const originalFetch = global.fetch;
describe("Facebook Marketplace Scraper Core Tests", () => {
beforeEach(() => {
global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests");
});
});
afterEach(() => {
global.fetch = originalFetch;
});
describe("Cookie Parsing", () => {
describe("parseFacebookCookieString", () => {
test("should parse valid cookie string", () => {
const cookieString = "c_user=123456789; xs=abcdef123456; fr=xyz789";
const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(3);
expect(result[0]).toEqual({
name: "c_user",
value: "123456789",
domain: ".facebook.com",
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax",
expirationDate: undefined,
});
expect(result[1]).toEqual({
name: "xs",
value: "abcdef123456",
domain: ".facebook.com",
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax",
expirationDate: undefined,
});
});
test("should handle URL-encoded values", () => {
const cookieString = "c_user=123%2B456; xs=abc%3Ddef";
const result = parseFacebookCookieString(cookieString);
expect(result[0].value).toBe("123+456");
expect(result[1].value).toBe("abc=def");
});
test("should filter out malformed cookies", () => {
const cookieString = "c_user=123; invalid; xs=abc; =empty";
const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(2);
expect(result.map((c) => c.name)).toEqual(["c_user", "xs"]);
});
test("should handle empty input", () => {
expect(parseFacebookCookieString("")).toEqual([]);
expect(parseFacebookCookieString(" ")).toEqual([]);
});
test("should handle extra whitespace", () => {
const cookieString = " c_user = 123 ; xs=abc ";
const result = parseFacebookCookieString(cookieString);
expect(result).toHaveLength(2);
expect(result[0].name).toBe("c_user");
expect(result[0].value).toBe("123");
expect(result[1].name).toBe("xs");
expect(result[1].value).toBe("abc");
});
});
});
describe("Facebook Item Fetching", () => {
describe("fetchFacebookItem", () => {
const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com" },
{ name: "xs", value: "abc123", domain: ".facebook.com" },
]);
test("should handle authentication errors", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: false,
status: 401,
text: () => Promise.resolve("Authentication required"),
headers: {
get: () => null,
},
}),
);
const result = await fetchFacebookItem("123", mockCookies);
expect(result).toBeNull();
});
test("should handle item not found", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: false,
status: 404,
text: () => Promise.resolve("Not found"),
headers: {
get: () => null,
},
}),
);
const result = await fetchFacebookItem("nonexistent", mockCookies);
expect(result).toBeNull();
});
test("should handle rate limiting", async () => {
let attempts = 0;
global.fetch = mock(() => {
attempts++;
if (attempts === 1) {
return Promise.resolve({
ok: false,
status: 429,
headers: {
get: (header: string) => {
if (header === "X-RateLimit-Reset") return "1";
return null;
},
},
text: () => Promise.resolve("Rate limited"),
});
}
const mockData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
viewer: {
marketplace_product_details_page: {
target: {
id: "123",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Test Item",
is_live: true,
},
},
},
},
},
},
},
],
],
};
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
),
headers: {
get: () => null,
},
});
});
const result = await fetchFacebookItem("123", mockCookies);
expect(attempts).toBe(2);
// Should eventually succeed after retry
});
test("should handle sold items", async () => {
const mockData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
viewer: {
marketplace_product_details_page: {
target: {
id: "456",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Sold Item",
is_sold: true,
is_live: false,
},
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
),
headers: {
get: () => null,
},
}),
);
const result = await fetchFacebookItem("456", mockCookies);
expect(result?.listingStatus).toBe("SOLD");
});
test("should handle missing authentication cookies", async () => {
// Use a test-specific cookie file that doesn't exist
const testCookiePath = "./cookies/facebook-test.json";
// Test with no cookies available (test file doesn't exist)
await expect(
fetchFacebookItem("123", undefined, testCookiePath),
).rejects.toThrow("No valid Facebook cookies found");
});
test("should handle successful item extraction", async () => {
const mockData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
viewer: {
marketplace_product_details_page: {
target: {
id: "789",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Working Item",
formatted_price: { text: "$299.00" },
listing_price: {
amount: "299.00",
currency: "CAD",
},
is_live: true,
creation_time: 1640995200,
},
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockData)}</script></body></html>`,
),
headers: {
get: () => null,
},
}),
);
const result = await fetchFacebookItem("789", mockCookies);
expect(result).not.toBeNull();
expect(result?.title).toBe("Working Item");
expect(result?.listingPrice?.amountFormatted).toBe("$299.00");
expect(result?.listingStatus).toBe("ACTIVE");
});
test("should handle server errors", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: false,
status: 500,
text: () => Promise.resolve("Internal Server Error"),
headers: {
get: () => null,
},
}),
);
const result = await fetchFacebookItem("error", mockCookies);
expect(result).toBeNull();
});
});
});
describe("Data Extraction", () => {
describe("extractFacebookItemData", () => {
test("should extract item data from standard require structure", () => {
const mockItemData = {
id: "123456",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "Test Item",
formatted_price: { text: "$100.00" },
listing_price: { amount: "100.00", currency: "CAD" },
is_live: true,
};
const mockData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
viewer: {
marketplace_product_details_page: {
target: mockItemData,
},
},
},
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
const result = extractFacebookItemData(html);
expect(result).not.toBeNull();
expect(result?.id).toBe("123456");
expect(result?.marketplace_listing_title).toBe("Test Item");
});
test("should handle missing item data", () => {
const mockData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
viewer: {
marketplace_product_details_page: {},
},
},
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
const result = extractFacebookItemData(html);
expect(result).toBeNull();
});
test("should handle malformed HTML", () => {
const result = extractFacebookItemData(
"<html><body>Invalid HTML</body></html>",
);
expect(result).toBeNull();
});
test("should handle invalid JSON in script tags", () => {
const html =
"<html><body><script>{invalid: json}</script></body></html>";
const result = extractFacebookItemData(html);
expect(result).toBeNull();
});
test("should extract item with vehicle data", () => {
const mockVehicleItem = {
id: "789",
__typename: "GroupCommerceProductItem",
marketplace_listing_title: "2006 Honda Civic",
formatted_price: { text: "$5,000" },
listing_price: { amount: "5000.00", currency: "CAD" },
vehicle_make_display_name: "Honda",
vehicle_model_display_name: "Civic",
vehicle_odometer_data: { unit: "KILOMETERS", value: 150000 },
vehicle_transmission_type: "AUTOMATIC",
is_live: true,
};
const mockData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
viewer: {
marketplace_product_details_page: {
target: mockVehicleItem,
},
},
},
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
const result = extractFacebookItemData(html);
expect(result).not.toBeNull();
expect(result?.vehicle_make_display_name).toBe("Honda");
expect(result?.vehicle_odometer_data?.value).toBe(150000);
});
});
describe("extractFacebookMarketplaceData", () => {
test("should extract search results from marketplace data", () => {
const mockMarketplaceData = {
feed_units: {
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Item 1",
listing_price: { amount: "10.00", currency: "CAD" },
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Item 2",
listing_price: { amount: "20.00", currency: "CAD" },
},
},
},
],
},
};
const mockData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: mockMarketplaceData,
},
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
const result = extractFacebookMarketplaceData(html);
expect(result).not.toBeNull();
expect(result).toHaveLength(2);
expect(result?.[0].node.listing.marketplace_listing_title).toBe(
"Item 1",
);
});
test("should handle empty search results", () => {
const mockData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: { edges: [] },
},
},
},
},
},
],
],
};
const html = `<html><body><script>${JSON.stringify(mockData)}</script></body></html>`;
const result = extractFacebookMarketplaceData(html);
expect(result).toBeNull();
});
});
});
describe("Data Parsing", () => {
describe("parseFacebookItem", () => {
test("should parse complete item with all fields", () => {
const item = {
id: "123456",
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "iPhone 13 Pro",
redacted_description: { text: "Excellent condition" },
formatted_price: { text: "$800.00" },
listing_price: { amount: "800.00", currency: "CAD" },
location_text: { text: "Toronto, ON" },
is_live: true,
creation_time: 1640995200,
marketplace_listing_seller: {
id: "seller1",
name: "John Doe",
},
delivery_types: ["IN_PERSON"],
};
const result = parseFacebookItem(item);
expect(result).not.toBeNull();
expect(result?.title).toBe("iPhone 13 Pro");
expect(result?.description).toBe("Excellent condition");
expect(result?.listingPrice?.amountFormatted).toBe("$800.00");
expect(result?.listingPrice?.cents).toBe(80000);
expect(result?.listingPrice?.currency).toBe("CAD");
expect(result?.address).toBe("Toronto, ON");
expect(result?.listingStatus).toBe("ACTIVE");
expect(result?.seller?.name).toBe("John Doe");
expect(result?.deliveryTypes).toEqual(["IN_PERSON"]);
});
test("should parse FREE items", () => {
const item = {
id: "789",
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Free Sofa",
formatted_price: { text: "FREE" },
listing_price: { amount: "0.00", currency: "CAD" },
is_live: true,
};
const result = parseFacebookItem(item);
expect(result).not.toBeNull();
expect(result?.title).toBe("Free Sofa");
expect(result?.listingPrice?.amountFormatted).toBe("FREE");
expect(result?.listingPrice?.cents).toBe(0);
});
test("should handle missing optional fields", () => {
const item = {
id: "456",
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Minimal Item",
};
const result = parseFacebookItem(item);
expect(result).not.toBeNull();
expect(result?.title).toBe("Minimal Item");
expect(result?.description).toBeUndefined();
expect(result?.seller).toBeUndefined();
});
test("should identify vehicle listings", () => {
const vehicleItem = {
id: "999",
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "2012 Mazda 3",
formatted_price: { text: "$8,000" },
listing_price: { amount: "8000.00", currency: "CAD" },
vehicle_make_display_name: "Mazda",
vehicle_model_display_name: "3",
is_live: true,
};
const result = parseFacebookItem(vehicleItem);
expect(result?.listingType).toBe("vehicle");
});
test("should handle different listing statuses", () => {
const soldItem = {
id: "111",
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Sold Item",
is_sold: true,
is_live: false,
};
const pendingItem = {
id: "222",
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Pending Item",
is_pending: true,
is_live: true,
};
const hiddenItem = {
id: "333",
__typename: "GroupCommerceProductItem" as const,
marketplace_listing_title: "Hidden Item",
is_hidden: true,
is_live: false,
};
expect(parseFacebookItem(soldItem)?.listingStatus).toBe("SOLD");
expect(parseFacebookItem(pendingItem)?.listingStatus).toBe("PENDING");
expect(parseFacebookItem(hiddenItem)?.listingStatus).toBe("HIDDEN");
});
test("should return null for items without title", () => {
const invalidItem = {
id: "invalid",
__typename: "GroupCommerceProductItem" as const,
is_live: true,
};
const result = parseFacebookItem(invalidItem);
expect(result).toBeNull();
});
});
describe("parseFacebookAds", () => {
test("should parse search result ads", () => {
const ads = [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Ad 1",
listing_price: {
amount: "50.00",
formatted_amount: "$50.00",
currency: "CAD",
},
location: {
reverse_geocode: { city_page: { display_name: "Toronto" } },
},
creation_time: 1640995200,
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Ad 2",
listing_price: {
amount: "75.00",
formatted_amount: "$75.00",
currency: "CAD",
},
location: {
reverse_geocode: { city_page: { display_name: "Ottawa" } },
},
creation_time: 1640995300,
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
expect(results).toHaveLength(2);
expect(results[0].title).toBe("Ad 1");
expect(results[0].listingPrice?.cents).toBe(5000);
expect(results[0].address).toBe("Toronto");
expect(results[1].title).toBe("Ad 2");
expect(results[1].address).toBe("Ottawa");
});
test("should filter out ads without price", () => {
const ads = [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "With Price",
listing_price: {
amount: "100.00",
formatted_amount: "$100.00",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "No Price",
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("With Price");
});
test("should handle malformed ads gracefully", () => {
const ads = [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Valid Ad",
listing_price: {
amount: "50.00",
formatted_amount: "$50.00",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
// Missing listing
},
} as { node: { listing?: unknown } },
];
const results = parseFacebookAds(ads);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Valid Ad");
});
});
});
describe("Utility Functions", () => {
describe("formatCentsToCurrency", () => {
test("should format cents to currency string", () => {
expect(formatCentsToCurrency(100)).toBe("$1.00");
expect(formatCentsToCurrency(1000)).toBe("$10.00");
expect(formatCentsToCurrency(9999)).toBe("$99.99");
expect(formatCentsToCurrency(123456)).toBe("$1,234.56");
});
test("should handle string inputs", () => {
expect(formatCentsToCurrency("100")).toBe("$1.00");
expect(formatCentsToCurrency("1000")).toBe("$10.00");
});
test("should handle zero", () => {
expect(formatCentsToCurrency(0)).toBe("$0.00");
});
test("should handle null and undefined", () => {
expect(formatCentsToCurrency(null)).toBe("");
expect(formatCentsToCurrency(undefined)).toBe("");
});
test("should handle invalid inputs", () => {
expect(formatCentsToCurrency("invalid")).toBe("");
expect(formatCentsToCurrency(Number.NaN)).toBe("");
});
});
describe("formatCookiesForHeader", () => {
const mockCookies = [
{ name: "c_user", value: "123456", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abcdef", domain: ".facebook.com", path: "/" },
{ name: "session_id", value: "xyz", domain: "other.com", path: "/" },
];
test("should format cookies for header string", () => {
const result = formatCookiesForHeader(mockCookies, "www.facebook.com");
expect(result).toBe("c_user=123456; xs=abcdef");
});
test("should filter expired cookies", () => {
const cookiesWithExpiration = [
...mockCookies,
{
name: "expired",
value: "old",
domain: ".facebook.com",
path: "/",
expirationDate: Date.now() / 1000 - 1000,
},
];
const result = formatCookiesForHeader(
cookiesWithExpiration,
"www.facebook.com",
);
expect(result).not.toContain("expired");
});
test("should handle no matching cookies", () => {
const result = formatCookiesForHeader(mockCookies, "www.google.com");
expect(result).toBe("");
});
test("should handle empty cookie array", () => {
const result = formatCookiesForHeader([], "www.facebook.com");
expect(result).toBe("");
});
});
});
});

View File

@@ -0,0 +1,712 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import fetchFacebookItems, { fetchFacebookItem } from "../src/facebook";
// Mock fetch globally
const originalFetch = global.fetch;
describe("Facebook Marketplace Scraper Integration Tests", () => {
beforeEach(() => {
global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests");
});
});
afterEach(() => {
global.fetch = originalFetch;
});
describe("Main Search Function", () => {
const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]);
test("should successfully fetch search results", async () => {
const mockSearchData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: {
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "iPhone 13 Pro",
listing_price: {
amount: "800.00",
formatted_amount: "$800.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Toronto" },
},
},
creation_time: 1640995200,
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Samsung Galaxy",
listing_price: {
amount: "600.00",
formatted_amount: "$600.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Mississauga" },
},
},
creation_time: 1640995300,
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"iPhone",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(2);
expect(results[0].title).toBe("iPhone 13 Pro");
expect(results[1].title).toBe("Samsung Galaxy");
});
test("should filter out items without price", async () => {
const mockSearchData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: {
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "With Price",
listing_price: {
amount: "100.00",
formatted_amount: "$100.00",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "No Price",
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("With Price");
});
test("should respect MAX_ITEMS parameter", async () => {
const mockSearchData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: {
edges: Array.from({ length: 10 }, (_, i) => ({
node: {
listing: {
id: String(i),
marketplace_listing_title: `Item ${i}`,
listing_price: {
amount: `${(i + 1) * 10}.00`,
formatted_amount: `$${(i + 1) * 10}.00`,
currency: "CAD",
},
is_live: true,
},
},
})),
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
5,
mockCookies,
);
expect(results).toHaveLength(5);
});
test("should return empty array for no results", async () => {
const mockSearchData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: {
edges: [],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"nonexistent query",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]);
});
test("should handle authentication errors gracefully", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: false,
status: 401,
text: () => Promise.resolve("Unauthorized"),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]);
});
test("should handle network errors", async () => {
global.fetch = mock(() => Promise.reject(new Error("Network error")));
await expect(
fetchFacebookItems("test", 1, "toronto", 25, mockCookies),
).rejects.toThrow("Network error");
});
test("should handle rate limiting with retry", async () => {
let attempts = 0;
global.fetch = mock(() => {
attempts++;
if (attempts === 1) {
return Promise.resolve({
ok: false,
status: 429,
headers: {
get: (header: string) => {
if (header === "X-RateLimit-Reset") return "1";
return null;
},
},
text: () => Promise.resolve("Rate limited"),
});
}
const mockSearchData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: {
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Item 1",
listing_price: {
amount: "100.00",
formatted_amount: "$100.00",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
return Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null,
},
});
});
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(attempts).toBe(2);
expect(results).toHaveLength(1);
});
});
describe("Vehicle Listing Integration", () => {
const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]);
test("should correctly identify and parse vehicle listings", async () => {
const mockSearchData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: {
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "2006 Honda Civic",
listing_price: {
amount: "8000.00",
formatted_amount: "$8,000.00",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "iPhone 13",
listing_price: {
amount: "800.00",
formatted_amount: "$800.00",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"cars",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(2);
// Both should be classified as "item" type in search results (vehicle detection is for item details)
expect(results[0].title).toBe("2006 Honda Civic");
expect(results[1].title).toBe("iPhone 13");
});
});
describe("Different Categories", () => {
const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]);
test("should handle electronics listings", async () => {
const mockSearchData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: {
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Nintendo Switch",
listing_price: {
amount: "250.00",
formatted_amount: "$250.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Toronto" },
},
},
marketplace_listing_category_id:
"479353692612078",
condition: "USED",
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"nintendo switch",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Nintendo Switch");
expect(results[0].categoryId).toBe("479353692612078");
});
test("should handle home goods/furniture listings", async () => {
const mockSearchData = {
require: [
[
null,
null,
null,
{
__bbox: {
result: {
data: {
marketplace_search: {
feed_units: {
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Dining Table",
listing_price: {
amount: "150.00",
formatted_amount: "$150.00",
currency: "CAD",
},
location: {
reverse_geocode: {
city_page: { display_name: "Mississauga" },
},
},
marketplace_listing_category_id:
"1569171756675761",
condition: "USED",
is_live: true,
},
},
},
],
},
},
},
},
},
},
],
],
};
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
`<html><body><script>${JSON.stringify(mockSearchData)}</script></body></html>`,
),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"table",
1,
"toronto",
25,
mockCookies,
);
expect(results).toHaveLength(1);
expect(results[0].title).toBe("Dining Table");
expect(results[0].categoryId).toBe("1569171756675761");
});
});
describe("Error Scenarios", () => {
const mockCookies = JSON.stringify([
{ name: "c_user", value: "12345", domain: ".facebook.com", path: "/" },
{ name: "xs", value: "abc123", domain: ".facebook.com", path: "/" },
]);
test("should handle malformed HTML responses", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(
"<html><body>Invalid HTML without JSON data</body></html>",
),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]);
});
test("should handle 404 errors gracefully", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: false,
status: 404,
text: () => Promise.resolve("Not found"),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]);
});
test("should handle 500 errors gracefully", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: false,
status: 500,
text: () => Promise.resolve("Internal Server Error"),
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems(
"test",
1,
"toronto",
25,
mockCookies,
);
expect(results).toEqual([]);
});
});
});

View File

@@ -1,162 +1,166 @@
import { describe, test, expect } from "bun:test";
import { describe, expect, test } from "bun:test";
import {
resolveLocationId,
resolveCategoryId,
buildSearchUrl,
HttpError,
NetworkError,
ParseError,
RateLimitError,
ValidationError
HttpError,
NetworkError,
ParseError,
RateLimitError,
ValidationError,
buildSearchUrl,
resolveCategoryId,
resolveLocationId,
} from "../src/kijiji";
describe("Location and Category Resolution", () => {
describe("resolveLocationId", () => {
test("should return numeric IDs as-is", () => {
expect(resolveLocationId(1700272)).toBe(1700272);
expect(resolveLocationId(0)).toBe(0);
});
describe("resolveLocationId", () => {
test("should return numeric IDs as-is", () => {
expect(resolveLocationId(1700272)).toBe(1700272);
expect(resolveLocationId(0)).toBe(0);
});
test("should resolve string location names", () => {
expect(resolveLocationId("canada")).toBe(0);
expect(resolveLocationId("ontario")).toBe(9004);
expect(resolveLocationId("toronto")).toBe(1700273);
expect(resolveLocationId("gta")).toBe(1700272);
});
test("should resolve string location names", () => {
expect(resolveLocationId("canada")).toBe(0);
expect(resolveLocationId("ontario")).toBe(9004);
expect(resolveLocationId("toronto")).toBe(1700273);
expect(resolveLocationId("gta")).toBe(1700272);
});
test("should handle case insensitive matching", () => {
expect(resolveLocationId("Canada")).toBe(0);
expect(resolveLocationId("ONTARIO")).toBe(9004);
});
test("should handle case insensitive matching", () => {
expect(resolveLocationId("Canada")).toBe(0);
expect(resolveLocationId("ONTARIO")).toBe(9004);
});
test("should default to Canada for unknown locations", () => {
expect(resolveLocationId("unknown")).toBe(0);
expect(resolveLocationId("")).toBe(0);
});
test("should default to Canada for unknown locations", () => {
expect(resolveLocationId("unknown")).toBe(0);
expect(resolveLocationId("")).toBe(0);
});
test("should handle undefined input", () => {
expect(resolveLocationId(undefined)).toBe(0);
});
});
test("should handle undefined input", () => {
expect(resolveLocationId(undefined)).toBe(0);
});
});
describe("resolveCategoryId", () => {
test("should return numeric IDs as-is", () => {
expect(resolveCategoryId(132)).toBe(132);
expect(resolveCategoryId(0)).toBe(0);
});
describe("resolveCategoryId", () => {
test("should return numeric IDs as-is", () => {
expect(resolveCategoryId(132)).toBe(132);
expect(resolveCategoryId(0)).toBe(0);
});
test("should resolve string category names", () => {
expect(resolveCategoryId("all")).toBe(0);
expect(resolveCategoryId("phones")).toBe(132);
expect(resolveCategoryId("electronics")).toBe(29659001);
expect(resolveCategoryId("buy-sell")).toBe(10);
});
test("should resolve string category names", () => {
expect(resolveCategoryId("all")).toBe(0);
expect(resolveCategoryId("phones")).toBe(132);
expect(resolveCategoryId("electronics")).toBe(29659001);
expect(resolveCategoryId("buy-sell")).toBe(10);
});
test("should handle case insensitive matching", () => {
expect(resolveCategoryId("All")).toBe(0);
expect(resolveCategoryId("PHONES")).toBe(132);
});
test("should handle case insensitive matching", () => {
expect(resolveCategoryId("All")).toBe(0);
expect(resolveCategoryId("PHONES")).toBe(132);
});
test("should default to all categories for unknown categories", () => {
expect(resolveCategoryId("unknown")).toBe(0);
expect(resolveCategoryId("")).toBe(0);
});
test("should default to all categories for unknown categories", () => {
expect(resolveCategoryId("unknown")).toBe(0);
expect(resolveCategoryId("")).toBe(0);
});
test("should handle undefined input", () => {
expect(resolveCategoryId(undefined)).toBe(0);
});
});
test("should handle undefined input", () => {
expect(resolveCategoryId(undefined)).toBe(0);
});
});
});
describe("URL Construction", () => {
describe("buildSearchUrl", () => {
test("should build basic search URL", () => {
const url = buildSearchUrl("iphone", {
location: 1700272,
category: 132,
sortBy: 'relevancy',
sortOrder: 'desc',
});
describe("buildSearchUrl", () => {
test("should build basic search URL", () => {
const url = buildSearchUrl("iphone", {
location: 1700272,
category: 132,
sortBy: "relevancy",
sortOrder: "desc",
});
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
expect(url).toContain("sort=relevancyDesc");
expect(url).toContain("order=DESC");
});
expect(url).toContain("b-buy-sell/canada/iphone/k0c132l1700272");
expect(url).toContain("sort=relevancyDesc");
expect(url).toContain("order=DESC");
});
test("should handle pagination", () => {
const url = buildSearchUrl("iphone", {
location: 1700272,
category: 132,
page: 2,
});
test("should handle pagination", () => {
const url = buildSearchUrl("iphone", {
location: 1700272,
category: 132,
page: 2,
});
expect(url).toContain("&page=2");
});
expect(url).toContain("&page=2");
});
test("should handle different sort options", () => {
const dateUrl = buildSearchUrl("iphone", {
sortBy: 'date',
sortOrder: 'asc',
});
expect(dateUrl).toContain("sort=DATE");
expect(dateUrl).toContain("order=ASC");
test("should handle different sort options", () => {
const dateUrl = buildSearchUrl("iphone", {
sortBy: "date",
sortOrder: "asc",
});
expect(dateUrl).toContain("sort=DATE");
expect(dateUrl).toContain("order=ASC");
const priceUrl = buildSearchUrl("iphone", {
sortBy: 'price',
sortOrder: 'desc',
});
expect(priceUrl).toContain("sort=PRICE");
expect(priceUrl).toContain("order=DESC");
});
const priceUrl = buildSearchUrl("iphone", {
sortBy: "price",
sortOrder: "desc",
});
expect(priceUrl).toContain("sort=PRICE");
expect(priceUrl).toContain("order=DESC");
});
test("should handle string location/category inputs", () => {
const url = buildSearchUrl("iphone", {
location: "toronto",
category: "phones",
});
test("should handle string location/category inputs", () => {
const url = buildSearchUrl("iphone", {
location: "toronto",
category: "phones",
});
expect(url).toContain("k0c132l1700273"); // phones + toronto
});
});
expect(url).toContain("k0c132l1700273"); // phones + toronto
});
});
});
describe("Error Classes", () => {
test("HttpError should store status and URL", () => {
const error = new HttpError("Not found", 404, "https://example.com");
expect(error.message).toBe("Not found");
expect(error.status).toBe(404);
expect(error.url).toBe("https://example.com");
expect(error.name).toBe("HttpError");
});
test("HttpError should store status and URL", () => {
const error = new HttpError("Not found", 404, "https://example.com");
expect(error.message).toBe("Not found");
expect(error.status).toBe(404);
expect(error.url).toBe("https://example.com");
expect(error.name).toBe("HttpError");
});
test("NetworkError should store URL and cause", () => {
const cause = new Error("Connection failed");
const error = new NetworkError("Network error", "https://example.com", cause);
expect(error.message).toBe("Network error");
expect(error.url).toBe("https://example.com");
expect(error.cause).toBe(cause);
expect(error.name).toBe("NetworkError");
});
test("NetworkError should store URL and cause", () => {
const cause = new Error("Connection failed");
const error = new NetworkError(
"Network error",
"https://example.com",
cause,
);
expect(error.message).toBe("Network error");
expect(error.url).toBe("https://example.com");
expect(error.cause).toBe(cause);
expect(error.name).toBe("NetworkError");
});
test("ParseError should store data", () => {
const data = { invalid: "json" };
const error = new ParseError("Invalid JSON", data);
expect(error.message).toBe("Invalid JSON");
expect(error.data).toBe(data);
expect(error.name).toBe("ParseError");
});
test("ParseError should store data", () => {
const data = { invalid: "json" };
const error = new ParseError("Invalid JSON", data);
expect(error.message).toBe("Invalid JSON");
expect(error.data).toBe(data);
expect(error.name).toBe("ParseError");
});
test("RateLimitError should store URL and reset time", () => {
const error = new RateLimitError("Rate limited", "https://example.com", 60);
expect(error.message).toBe("Rate limited");
expect(error.url).toBe("https://example.com");
expect(error.resetTime).toBe(60);
expect(error.name).toBe("RateLimitError");
});
test("RateLimitError should store URL and reset time", () => {
const error = new RateLimitError("Rate limited", "https://example.com", 60);
expect(error.message).toBe("Rate limited");
expect(error.url).toBe("https://example.com");
expect(error.resetTime).toBe(60);
expect(error.name).toBe("RateLimitError");
});
test("ValidationError should work without field", () => {
const error = new ValidationError("Invalid value");
expect(error.message).toBe("Invalid value");
expect(error.name).toBe("ValidationError");
});
test("ValidationError should work without field", () => {
const error = new ValidationError("Invalid value");
expect(error.message).toBe("Invalid value");
expect(error.name).toBe("ValidationError");
});
});

View File

@@ -1,337 +1,363 @@
import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test";
import { extractApolloState, parseSearch, parseDetailedListing } from "../src/kijiji";
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import {
extractApolloState,
parseDetailedListing,
parseSearch,
} from "../src/kijiji";
// Mock fetch globally
const originalFetch = global.fetch;
describe("HTML Parsing Integration", () => {
beforeEach(() => {
// Mock fetch for all tests
global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests");
});
});
beforeEach(() => {
// Mock fetch for all tests
global.fetch = mock(() => {
throw new Error("fetch should be mocked in individual tests");
});
});
afterEach(() => {
global.fetch = originalFetch;
});
afterEach(() => {
global.fetch = originalFetch;
});
describe("extractApolloState", () => {
test("should extract Apollo state from valid HTML", () => {
const mockHtml = '<html><head><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"__APOLLO_STATE__":{"ROOT_QUERY":{"test":"value"}}}}}</script></head></html>';
describe("extractApolloState", () => {
test("should extract Apollo state from valid HTML", () => {
const mockHtml =
'<html><head><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"__APOLLO_STATE__":{"ROOT_QUERY":{"test":"value"}}}}}</script></head></html>';
const result = extractApolloState(mockHtml);
expect(result).toEqual({
ROOT_QUERY: { test: "value" }
});
});
const result = extractApolloState(mockHtml);
expect(result).toEqual({
ROOT_QUERY: { test: "value" },
});
});
test("should return null for HTML without Apollo state", () => {
const mockHtml = '<html><body>No data here</body></html>';
const result = extractApolloState(mockHtml);
expect(result).toBeNull();
});
test("should return null for HTML without Apollo state", () => {
const mockHtml = "<html><body>No data here</body></html>";
const result = extractApolloState(mockHtml);
expect(result).toBeNull();
});
test("should return null for malformed JSON", () => {
const mockHtml = '<html><script id="__NEXT_DATA__" type="application/json">{"invalid": json}</script></html>';
test("should return null for malformed JSON", () => {
const mockHtml =
'<html><script id="__NEXT_DATA__" type="application/json">{"invalid": json}</script></html>';
const result = extractApolloState(mockHtml);
expect(result).toBeNull();
});
const result = extractApolloState(mockHtml);
expect(result).toBeNull();
});
test("should handle missing __NEXT_DATA__ element", () => {
const mockHtml = '<html><body><div>Content</div></body></html>';
const result = extractApolloState(mockHtml);
expect(result).toBeNull();
});
});
test("should handle missing __NEXT_DATA__ element", () => {
const mockHtml = "<html><body><div>Content</div></body></html>";
const result = extractApolloState(mockHtml);
expect(result).toBeNull();
});
});
describe("parseSearch", () => {
test("should parse search results from HTML", () => {
const mockHtml = `
describe("parseSearch", () => {
test("should parse search results from HTML", () => {
const mockHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "/v-iphone/k0l0",
title: "iPhone 13 Pro",
},
"Listing:456": {
url: "/v-samsung/k0l0",
title: "Samsung Galaxy",
},
"ROOT_QUERY": { test: "value" }
}
}
}
})}
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "/v-iphone/k0l0",
title: "iPhone 13 Pro",
},
"Listing:456": {
url: "/v-samsung/k0l0",
title: "Samsung Galaxy",
},
ROOT_QUERY: { test: "value" },
},
},
},
})}
</script>
</html>
`;
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results).toHaveLength(2);
expect(results[0]).toEqual({
name: "iPhone 13 Pro",
listingLink: "https://www.kijiji.ca/v-iphone/k0l0"
});
expect(results[1]).toEqual({
name: "Samsung Galaxy",
listingLink: "https://www.kijiji.ca/v-samsung/k0l0"
});
});
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results).toHaveLength(2);
expect(results[0]).toEqual({
name: "iPhone 13 Pro",
listingLink: "https://www.kijiji.ca/v-iphone/k0l0",
});
expect(results[1]).toEqual({
name: "Samsung Galaxy",
listingLink: "https://www.kijiji.ca/v-samsung/k0l0",
});
});
test("should handle absolute URLs", () => {
const mockHtml = `
test("should handle absolute URLs", () => {
const mockHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "https://www.kijiji.ca/v-iphone/k0l0",
title: "iPhone 13 Pro",
}
}
}
}
})}
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "https://www.kijiji.ca/v-iphone/k0l0",
title: "iPhone 13 Pro",
},
},
},
},
})}
</script>
</html>
`;
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results[0].listingLink).toBe("https://www.kijiji.ca/v-iphone/k0l0");
});
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results[0].listingLink).toBe(
"https://www.kijiji.ca/v-iphone/k0l0",
);
});
test("should filter out invalid listings", () => {
const mockHtml = `
test("should filter out invalid listings", () => {
const mockHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "/v-iphone/k0l0",
title: "iPhone 13 Pro",
},
"Listing:456": {
url: "/v-samsung/k0l0",
// Missing title
},
"Other:789": {
url: "/v-other/k0l0",
title: "Other Item",
}
}
}
}
})}
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "/v-iphone/k0l0",
title: "iPhone 13 Pro",
},
"Listing:456": {
url: "/v-samsung/k0l0",
// Missing title
},
"Other:789": {
url: "/v-other/k0l0",
title: "Other Item",
},
},
},
},
})}
</script>
</html>
`;
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results).toHaveLength(1);
expect(results[0].name).toBe("iPhone 13 Pro");
});
const results = parseSearch(mockHtml, "https://www.kijiji.ca");
expect(results).toHaveLength(1);
expect(results[0].name).toBe("iPhone 13 Pro");
});
test("should return empty array for invalid HTML", () => {
const results = parseSearch("<html><body>Invalid</body></html>", "https://www.kijiji.ca");
expect(results).toEqual([]);
});
});
test("should return empty array for invalid HTML", () => {
const results = parseSearch(
"<html><body>Invalid</body></html>",
"https://www.kijiji.ca",
);
expect(results).toEqual([]);
});
});
describe("parseDetailedListing", () => {
test("should parse detailed listing with all fields", async () => {
const mockHtml = `
describe("parseDetailedListing", () => {
test("should parse detailed listing with all fields", async () => {
const mockHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "/v-iphone-13-pro/k0l0",
title: "iPhone 13 Pro 256GB",
description: "Excellent condition iPhone 13 Pro",
price: {
amount: 80000,
currency: "CAD",
type: "FIXED"
},
type: "OFFER",
status: "ACTIVE",
activationDate: "2024-01-15T10:00:00.000Z",
endDate: "2025-01-15T10:00:00.000Z",
metrics: { views: 150 },
location: {
address: "Toronto, ON",
id: 1700273,
name: "Toronto",
coordinates: {
latitude: 43.6532,
longitude: -79.3832
}
},
imageUrls: [
"https://media.kijiji.ca/api/v1/image1.jpg",
"https://media.kijiji.ca/api/v1/image2.jpg"
],
imageCount: 2,
categoryId: 132,
adSource: "ORGANIC",
flags: {
topAd: false,
priceDrop: true
},
posterInfo: {
posterId: "user123",
rating: 4.8
},
attributes: [
{ canonicalName: "forsaleby", canonicalValues: ["ownr"] },
{ canonicalName: "phonecarrier", canonicalValues: ["unlocked"] }
]
}
}
}
}
})}
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "/v-iphone-13-pro/k0l0",
title: "iPhone 13 Pro 256GB",
description: "Excellent condition iPhone 13 Pro",
price: {
amount: 80000,
currency: "CAD",
type: "FIXED",
},
type: "OFFER",
status: "ACTIVE",
activationDate: "2024-01-15T10:00:00.000Z",
endDate: "2025-01-15T10:00:00.000Z",
metrics: { views: 150 },
location: {
address: "Toronto, ON",
id: 1700273,
name: "Toronto",
coordinates: {
latitude: 43.6532,
longitude: -79.3832,
},
},
imageUrls: [
"https://media.kijiji.ca/api/v1/image1.jpg",
"https://media.kijiji.ca/api/v1/image2.jpg",
],
imageCount: 2,
categoryId: 132,
adSource: "ORGANIC",
flags: {
topAd: false,
priceDrop: true,
},
posterInfo: {
posterId: "user123",
rating: 4.8,
},
attributes: [
{
canonicalName: "forsaleby",
canonicalValues: ["ownr"],
},
{
canonicalName: "phonecarrier",
canonicalValues: ["unlocked"],
},
],
},
},
},
},
})}
</script>
</html>
`;
const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca");
expect(result).toEqual({
url: "https://www.kijiji.ca/v-iphone-13-pro/k0l0",
title: "iPhone 13 Pro 256GB",
description: "Excellent condition iPhone 13 Pro",
listingPrice: {
amountFormatted: "$800.00",
cents: 80000,
currency: "CAD"
},
listingType: "OFFER",
listingStatus: "ACTIVE",
creationDate: "2024-01-15T10:00:00.000Z",
endDate: "2025-01-15T10:00:00.000Z",
numberOfViews: 150,
address: "Toronto, ON",
images: [
"https://media.kijiji.ca/api/v1/image1.jpg",
"https://media.kijiji.ca/api/v1/image2.jpg"
],
categoryId: 132,
adSource: "ORGANIC",
flags: {
topAd: false,
priceDrop: true
},
attributes: {
forsaleby: ["ownr"],
phonecarrier: ["unlocked"]
},
location: {
id: 1700273,
name: "Toronto",
coordinates: {
latitude: 43.6532,
longitude: -79.3832
}
},
sellerInfo: {
posterId: "user123",
rating: 4.8
}
});
});
const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toEqual({
url: "https://www.kijiji.ca/v-iphone-13-pro/k0l0",
title: "iPhone 13 Pro 256GB",
description: "Excellent condition iPhone 13 Pro",
listingPrice: {
amountFormatted: "$800.00",
cents: 80000,
currency: "CAD",
},
listingType: "OFFER",
listingStatus: "ACTIVE",
creationDate: "2024-01-15T10:00:00.000Z",
endDate: "2025-01-15T10:00:00.000Z",
numberOfViews: 150,
address: "Toronto, ON",
images: [
"https://media.kijiji.ca/api/v1/image1.jpg",
"https://media.kijiji.ca/api/v1/image2.jpg",
],
categoryId: 132,
adSource: "ORGANIC",
flags: {
topAd: false,
priceDrop: true,
},
attributes: {
forsaleby: ["ownr"],
phonecarrier: ["unlocked"],
},
location: {
id: 1700273,
name: "Toronto",
coordinates: {
latitude: 43.6532,
longitude: -79.3832,
},
},
sellerInfo: {
posterId: "user123",
rating: 4.8,
},
});
});
test("should return null for contact-based pricing", async () => {
const mockHtml = `
test("should return null for contact-based pricing", async () => {
const mockHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "/v-iphone/k0l0",
title: "iPhone for Sale",
price: {
type: "CONTACT",
amount: null
}
}
}
}
}
})}
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "/v-iphone/k0l0",
title: "iPhone for Sale",
price: {
type: "CONTACT",
amount: null,
},
},
},
},
},
})}
</script>
</html>
`;
const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca");
expect(result).toBeNull();
});
const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toBeNull();
});
test("should handle missing optional fields", async () => {
const mockHtml = `
test("should handle missing optional fields", async () => {
const mockHtml = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "/v-iphone/k0l0",
title: "iPhone 13",
price: { amount: 50000 }
}
}
}
}
})}
props: {
pageProps: {
__APOLLO_STATE__: {
"Listing:123": {
url: "/v-iphone/k0l0",
title: "iPhone 13",
price: { amount: 50000 },
},
},
},
},
})}
</script>
</html>
`;
const result = await parseDetailedListing(mockHtml, "https://www.kijiji.ca");
expect(result).toEqual({
url: "https://www.kijiji.ca/v-iphone/k0l0",
title: "iPhone 13",
description: undefined,
listingPrice: {
amountFormatted: "$500.00",
cents: 50000,
currency: undefined
},
listingType: undefined,
listingStatus: undefined,
creationDate: undefined,
endDate: undefined,
numberOfViews: undefined,
address: null,
images: [],
categoryId: 0,
adSource: "UNKNOWN",
flags: {
topAd: false,
priceDrop: false
},
attributes: {},
location: {
id: 0,
name: "Unknown",
coordinates: undefined
},
sellerInfo: undefined
});
});
});
const result = await parseDetailedListing(
mockHtml,
"https://www.kijiji.ca",
);
expect(result).toEqual({
url: "https://www.kijiji.ca/v-iphone/k0l0",
title: "iPhone 13",
description: undefined,
listingPrice: {
amountFormatted: "$500.00",
cents: 50000,
currency: undefined,
},
listingType: undefined,
listingStatus: undefined,
creationDate: undefined,
endDate: undefined,
numberOfViews: undefined,
address: null,
images: [],
categoryId: 0,
adSource: "UNKNOWN",
flags: {
topAd: false,
priceDrop: false,
},
attributes: {},
location: {
id: 0,
name: "Unknown",
coordinates: undefined,
},
sellerInfo: undefined,
});
});
});
});

View File

@@ -1,54 +1,54 @@
import { describe, test, expect, beforeEach, afterEach } from "bun:test";
import { slugify, formatCentsToCurrency } from "../src/kijiji";
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
import { formatCentsToCurrency, slugify } from "../src/kijiji";
describe("Utility Functions", () => {
describe("slugify", () => {
test("should convert basic strings to slugs", () => {
expect(slugify("Hello World")).toBe("hello-world");
expect(slugify("iPhone 13 Pro")).toBe("iphone-13-pro");
});
describe("slugify", () => {
test("should convert basic strings to slugs", () => {
expect(slugify("Hello World")).toBe("hello-world");
expect(slugify("iPhone 13 Pro")).toBe("iphone-13-pro");
});
test("should handle special characters", () => {
expect(slugify("Café & Restaurant")).toBe("cafe-restaurant");
expect(slugify("100% New")).toBe("100-new");
});
test("should handle special characters", () => {
expect(slugify("Café & Restaurant")).toBe("cafe-restaurant");
expect(slugify("100% New")).toBe("100-new");
});
test("should handle empty and edge cases", () => {
expect(slugify("")).toBe("");
expect(slugify(" ")).toBe("-");
expect(slugify("---")).toBe("-");
});
test("should handle empty and edge cases", () => {
expect(slugify("")).toBe("");
expect(slugify(" ")).toBe("-");
expect(slugify("---")).toBe("-");
});
test("should preserve numbers and valid characters", () => {
expect(slugify("iPhone 13")).toBe("iphone-13");
expect(slugify("item123")).toBe("item123");
});
});
test("should preserve numbers and valid characters", () => {
expect(slugify("iPhone 13")).toBe("iphone-13");
expect(slugify("item123")).toBe("item123");
});
});
describe("formatCentsToCurrency", () => {
test("should format valid cent values", () => {
expect(formatCentsToCurrency(100)).toBe("$1.00");
expect(formatCentsToCurrency(1999)).toBe("$19.99");
expect(formatCentsToCurrency(0)).toBe("$0.00");
});
describe("formatCentsToCurrency", () => {
test("should format valid cent values", () => {
expect(formatCentsToCurrency(100)).toBe("$1.00");
expect(formatCentsToCurrency(1999)).toBe("$19.99");
expect(formatCentsToCurrency(0)).toBe("$0.00");
});
test("should handle string inputs", () => {
expect(formatCentsToCurrency("100")).toBe("$1.00");
expect(formatCentsToCurrency("1999")).toBe("$19.99");
});
test("should handle string inputs", () => {
expect(formatCentsToCurrency("100")).toBe("$1.00");
expect(formatCentsToCurrency("1999")).toBe("$19.99");
});
test("should handle null/undefined inputs", () => {
expect(formatCentsToCurrency(null)).toBe("");
expect(formatCentsToCurrency(undefined)).toBe("");
});
test("should handle null/undefined inputs", () => {
expect(formatCentsToCurrency(null)).toBe("");
expect(formatCentsToCurrency(undefined)).toBe("");
});
test("should handle invalid inputs", () => {
expect(formatCentsToCurrency("invalid")).toBe("");
expect(formatCentsToCurrency(Number.NaN)).toBe("");
});
test("should handle invalid inputs", () => {
expect(formatCentsToCurrency("invalid")).toBe("");
expect(formatCentsToCurrency(Number.NaN)).toBe("");
});
test("should use en-US locale formatting", () => {
expect(formatCentsToCurrency(123456)).toBe("$1,234.56");
});
});
test("should use en-US locale formatting", () => {
expect(formatCentsToCurrency(123456)).toBe("$1,234.56");
});
});
});

View File

@@ -5,8 +5,10 @@ import { expect } from "bun:test";
// This file is loaded before any tests run due to bunfig.toml preload
// Mock fetch globally for tests
global.fetch = global.fetch || (() => {
throw new Error('fetch is not available in test environment');
});
global.fetch =
global.fetch ||
(() => {
throw new Error("fetch is not available in test environment");
});
// Add any global test utilities here

View File

@@ -7,25 +7,21 @@
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
// Bundler mode
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"noEmit": true,
// Best practices
"strict": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true,
"noImplicitAny": true,
// Some stricter flags (disabled by default)
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false,
"paths": {
"@/*": ["./src/*"]
}