diff --git a/CLAUDE.md b/CLAUDE.md index 367ab1c..dc5556f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,10 +16,79 @@ This is a lightweight Bun-based API server for scraping marketplace listings fro - **Entry Point (`src/index.ts`)**: Implements a basic HTTP server using `Bun.serve`. Key routes: - `GET /api/status`: Health check returning "OK". - - `POST/GET /api/kijiji`: Accepts a search query via header (`query`) or param (`q`), scrapes Kijiji for up to 5 results (configurable), and returns JSON with listing details (title, price, description, etc.). - - `POST/GET /api/facebook`: Similar to Kijiji, but for Facebook Marketplace. Optional `location` param (default "toronto"). Note: Requires authentication cookies for full access. + - `GET /api/kijiji?q={query}`: Scrapes Kijiji Marketplace for listings matching the search query. Returns JSON array of listing objects. + - `GET /api/facebook?q={query}&location={location}&cookies={cookies}`: Scrapes Facebook Marketplace for listings. Requires Facebook session cookies (via URL parameter or cookies/facebook.json file). Optional `location` param (default "toronto"). Returns JSON array of listing objects. - Fallback: 404 for unmatched routes. +## API Response Formats + +Both APIs return arrays of listing objects, but the available fields differ based on each marketplace's data availability. + +### Kijiji API Response Object +```json +{ + "url": "https://www.kijiji.ca/v-laptops/city-of-toronto/...", + "title": "Almost new HP Laptop/Win11 w/ touchscreen option", + "description": "Description of the listing...", + "listingPrice": { + "amountFormatted": "149.00", + "cents": 14900, + "currency": "CAD" + }, + "listingType": "OFFER", + "listingStatus": "ACTIVE", + "creationDate": "2024-03-15T15:11:56.000Z", + "endDate": "3000-01-01T00:00:00.000Z", + "numberOfViews": 2005, + "address": "SPADINA AVENUE, Toronto, ON, M5T 2H7" +} +``` + +### Facebook API Response Object +```json +{ + "url": "https://www.facebook.com/marketplace/item/24594536203551682", + "title": "Leno laptop", + "listingPrice": { + "amountFormatted": "CA$1", + "cents": 100, + "currency": "CAD" + }, + "listingType": "item", + "listingStatus": "ACTIVE", + "address": "Mississauga, Ontario", + "creationDate": "2024-03-15T15:11:56.000Z", + "categoryId": "1792291877663080", + "imageUrl": "https://scontent-yyz1-1.xx.fbcdn.net/...", + "videoUrl": "https://www.facebook.com/1300609777949414/", + "seller": { + "name": "Joyce Diaz", + "id": "100091799187797" + }, + "deliveryTypes": ["IN_PERSON"] +} +``` + +### Common Fields +- `url`: Full URL to the listing +- `title`: Listing title +- `listingPrice`: Price object with `amountFormatted` (human-readable), `cents` (integer cents), `currency` (e.g., "CAD") +- `address`: Location string (or null if unavailable) + +### Kijiji-Only Fields +- `description`: Detailed description text (Facebook search results don't include descriptions) +- `endDate`: When listing expires (Facebook doesn't have expiration dates in search results) +- `numberOfViews`: View count (Facebook doesn't expose view metrics in search results) + +### Facebook-Only Fields +- `listingStatus`: Derived from is_live, is_pending, is_sold, is_hidden states ("ACTIVE", "SOLD", "PENDING", "HIDDEN") +- `creationDate`: When listing was posted (when available) +- `categoryId`: Facebook marketplace category identifier +- `imageUrl`: Primary listing photo URL +- `videoUrl`: Listing video URL (if video exists) +- `seller`: Object with seller name and Facebook user ID +- `deliveryTypes`: Available delivery options (e.g., ["IN_PERSON", "SHIPPING"]) + - **Kijiji Scraping (`src/kijiji.ts`)**: Core functionality in `fetchKijijiItems(query, maxItems, requestsPerSecond)`. - Slugifies the query using `unidecode` for URL-safe search terms. - Fetches the search page HTML, parses Next.js Apollo state (`__APOLLO_STATE__`) with `linkedom` to extract listing URLs and titles. diff --git a/src/facebook.ts b/src/facebook.ts index 0bce67f..6ba70b9 100644 --- a/src/facebook.ts +++ b/src/facebook.ts @@ -83,6 +83,15 @@ type ListingDetails = { endDate?: string; numberOfViews?: number; address?: string | null; + // Facebook-specific fields + imageUrl?: string; + videoUrl?: string; + seller?: { + name?: string; + id?: string; + }; + categoryId?: string; + deliveryTypes?: string[]; }; // ----------------------------- Utilities ----------------------------- @@ -437,11 +446,33 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] { listing.location?.reverse_geocode?.city_page?.display_name; const address = cityName || null; + // Determine listing status from Facebook flags + let listingStatus: string | undefined = undefined; + if (listing.is_sold) { + listingStatus = "SOLD"; + } else if (listing.is_pending) { + listingStatus = "PENDING"; + } else if (listing.is_live) { + listingStatus = "ACTIVE"; + } else if (listing.is_hidden) { + listingStatus = "HIDDEN"; + } + // Format creation date if available const creationDate = listing.creation_time ? new Date(listing.creation_time * 1000).toISOString() : undefined; + // Extract image and video URLs + const imageUrl = listing.primary_listing_photo?.image?.uri; + const videoUrl = listing.listing_video ? `https://www.facebook.com/${listing.listing_video.id}/` : undefined; + + // Extract seller information + const seller = listing.marketplace_listing_seller ? { + name: listing.marketplace_listing_seller.name, + id: listing.marketplace_listing_seller.id + } : undefined; + const listingDetails: ListingDetails = { url, title, @@ -453,6 +484,12 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] { address, creationDate, listingType: "item", // Default type for marketplace listings + listingStatus, + categoryId: listing.marketplace_listing_category_id, + imageUrl, + videoUrl, + seller, + deliveryTypes: listing.delivery_types, }; results.push(listingDetails);