feat(facebook): parse additional listing details like status, images, and seller info
Enhance Facebook scraping to extract listing status (ACTIVE/SOLD/PENDING/HIDDEN), primary image/video URLs, seller name/ID, category ID, and delivery options, improving response completeness.
This commit is contained in:
73
CLAUDE.md
73
CLAUDE.md
@@ -16,10 +16,79 @@ This is a lightweight Bun-based API server for scraping marketplace listings fro
|
|||||||
|
|
||||||
- **Entry Point (`src/index.ts`)**: Implements a basic HTTP server using `Bun.serve`. Key routes:
|
- **Entry Point (`src/index.ts`)**: Implements a basic HTTP server using `Bun.serve`. Key routes:
|
||||||
- `GET /api/status`: Health check returning "OK".
|
- `GET /api/status`: Health check returning "OK".
|
||||||
- `POST/GET /api/kijiji`: Accepts a search query via header (`query`) or param (`q`), scrapes Kijiji for up to 5 results (configurable), and returns JSON with listing details (title, price, description, etc.).
|
- `GET /api/kijiji?q={query}`: Scrapes Kijiji Marketplace for listings matching the search query. Returns JSON array of listing objects.
|
||||||
- `POST/GET /api/facebook`: Similar to Kijiji, but for Facebook Marketplace. Optional `location` param (default "toronto"). Note: Requires authentication cookies for full access.
|
- `GET /api/facebook?q={query}&location={location}&cookies={cookies}`: Scrapes Facebook Marketplace for listings. Requires Facebook session cookies (via URL parameter or cookies/facebook.json file). Optional `location` param (default "toronto"). Returns JSON array of listing objects.
|
||||||
- Fallback: 404 for unmatched routes.
|
- Fallback: 404 for unmatched routes.
|
||||||
|
|
||||||
|
## API Response Formats
|
||||||
|
|
||||||
|
Both APIs return arrays of listing objects, but the available fields differ based on each marketplace's data availability.
|
||||||
|
|
||||||
|
### Kijiji API Response Object
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"url": "https://www.kijiji.ca/v-laptops/city-of-toronto/...",
|
||||||
|
"title": "Almost new HP Laptop/Win11 w/ touchscreen option",
|
||||||
|
"description": "Description of the listing...",
|
||||||
|
"listingPrice": {
|
||||||
|
"amountFormatted": "149.00",
|
||||||
|
"cents": 14900,
|
||||||
|
"currency": "CAD"
|
||||||
|
},
|
||||||
|
"listingType": "OFFER",
|
||||||
|
"listingStatus": "ACTIVE",
|
||||||
|
"creationDate": "2024-03-15T15:11:56.000Z",
|
||||||
|
"endDate": "3000-01-01T00:00:00.000Z",
|
||||||
|
"numberOfViews": 2005,
|
||||||
|
"address": "SPADINA AVENUE, Toronto, ON, M5T 2H7"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Facebook API Response Object
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"url": "https://www.facebook.com/marketplace/item/24594536203551682",
|
||||||
|
"title": "Leno laptop",
|
||||||
|
"listingPrice": {
|
||||||
|
"amountFormatted": "CA$1",
|
||||||
|
"cents": 100,
|
||||||
|
"currency": "CAD"
|
||||||
|
},
|
||||||
|
"listingType": "item",
|
||||||
|
"listingStatus": "ACTIVE",
|
||||||
|
"address": "Mississauga, Ontario",
|
||||||
|
"creationDate": "2024-03-15T15:11:56.000Z",
|
||||||
|
"categoryId": "1792291877663080",
|
||||||
|
"imageUrl": "https://scontent-yyz1-1.xx.fbcdn.net/...",
|
||||||
|
"videoUrl": "https://www.facebook.com/1300609777949414/",
|
||||||
|
"seller": {
|
||||||
|
"name": "Joyce Diaz",
|
||||||
|
"id": "100091799187797"
|
||||||
|
},
|
||||||
|
"deliveryTypes": ["IN_PERSON"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Common Fields
|
||||||
|
- `url`: Full URL to the listing
|
||||||
|
- `title`: Listing title
|
||||||
|
- `listingPrice`: Price object with `amountFormatted` (human-readable), `cents` (integer cents), `currency` (e.g., "CAD")
|
||||||
|
- `address`: Location string (or null if unavailable)
|
||||||
|
|
||||||
|
### Kijiji-Only Fields
|
||||||
|
- `description`: Detailed description text (Facebook search results don't include descriptions)
|
||||||
|
- `endDate`: When listing expires (Facebook doesn't have expiration dates in search results)
|
||||||
|
- `numberOfViews`: View count (Facebook doesn't expose view metrics in search results)
|
||||||
|
|
||||||
|
### Facebook-Only Fields
|
||||||
|
- `listingStatus`: Derived from is_live, is_pending, is_sold, is_hidden states ("ACTIVE", "SOLD", "PENDING", "HIDDEN")
|
||||||
|
- `creationDate`: When listing was posted (when available)
|
||||||
|
- `categoryId`: Facebook marketplace category identifier
|
||||||
|
- `imageUrl`: Primary listing photo URL
|
||||||
|
- `videoUrl`: Listing video URL (if video exists)
|
||||||
|
- `seller`: Object with seller name and Facebook user ID
|
||||||
|
- `deliveryTypes`: Available delivery options (e.g., ["IN_PERSON", "SHIPPING"])
|
||||||
|
|
||||||
- **Kijiji Scraping (`src/kijiji.ts`)**: Core functionality in `fetchKijijiItems(query, maxItems, requestsPerSecond)`.
|
- **Kijiji Scraping (`src/kijiji.ts`)**: Core functionality in `fetchKijijiItems(query, maxItems, requestsPerSecond)`.
|
||||||
- Slugifies the query using `unidecode` for URL-safe search terms.
|
- Slugifies the query using `unidecode` for URL-safe search terms.
|
||||||
- Fetches the search page HTML, parses Next.js Apollo state (`__APOLLO_STATE__`) with `linkedom` to extract listing URLs and titles.
|
- Fetches the search page HTML, parses Next.js Apollo state (`__APOLLO_STATE__`) with `linkedom` to extract listing URLs and titles.
|
||||||
|
|||||||
@@ -83,6 +83,15 @@ type ListingDetails = {
|
|||||||
endDate?: string;
|
endDate?: string;
|
||||||
numberOfViews?: number;
|
numberOfViews?: number;
|
||||||
address?: string | null;
|
address?: string | null;
|
||||||
|
// Facebook-specific fields
|
||||||
|
imageUrl?: string;
|
||||||
|
videoUrl?: string;
|
||||||
|
seller?: {
|
||||||
|
name?: string;
|
||||||
|
id?: string;
|
||||||
|
};
|
||||||
|
categoryId?: string;
|
||||||
|
deliveryTypes?: string[];
|
||||||
};
|
};
|
||||||
|
|
||||||
// ----------------------------- Utilities -----------------------------
|
// ----------------------------- Utilities -----------------------------
|
||||||
@@ -437,11 +446,33 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
|
|||||||
listing.location?.reverse_geocode?.city_page?.display_name;
|
listing.location?.reverse_geocode?.city_page?.display_name;
|
||||||
const address = cityName || null;
|
const address = cityName || null;
|
||||||
|
|
||||||
|
// Determine listing status from Facebook flags
|
||||||
|
let listingStatus: string | undefined = undefined;
|
||||||
|
if (listing.is_sold) {
|
||||||
|
listingStatus = "SOLD";
|
||||||
|
} else if (listing.is_pending) {
|
||||||
|
listingStatus = "PENDING";
|
||||||
|
} else if (listing.is_live) {
|
||||||
|
listingStatus = "ACTIVE";
|
||||||
|
} else if (listing.is_hidden) {
|
||||||
|
listingStatus = "HIDDEN";
|
||||||
|
}
|
||||||
|
|
||||||
// Format creation date if available
|
// Format creation date if available
|
||||||
const creationDate = listing.creation_time
|
const creationDate = listing.creation_time
|
||||||
? new Date(listing.creation_time * 1000).toISOString()
|
? new Date(listing.creation_time * 1000).toISOString()
|
||||||
: undefined;
|
: undefined;
|
||||||
|
|
||||||
|
// Extract image and video URLs
|
||||||
|
const imageUrl = listing.primary_listing_photo?.image?.uri;
|
||||||
|
const videoUrl = listing.listing_video ? `https://www.facebook.com/${listing.listing_video.id}/` : undefined;
|
||||||
|
|
||||||
|
// Extract seller information
|
||||||
|
const seller = listing.marketplace_listing_seller ? {
|
||||||
|
name: listing.marketplace_listing_seller.name,
|
||||||
|
id: listing.marketplace_listing_seller.id
|
||||||
|
} : undefined;
|
||||||
|
|
||||||
const listingDetails: ListingDetails = {
|
const listingDetails: ListingDetails = {
|
||||||
url,
|
url,
|
||||||
title,
|
title,
|
||||||
@@ -453,6 +484,12 @@ function parseFacebookAds(ads: FacebookAdNode[]): ListingDetails[] {
|
|||||||
address,
|
address,
|
||||||
creationDate,
|
creationDate,
|
||||||
listingType: "item", // Default type for marketplace listings
|
listingType: "item", // Default type for marketplace listings
|
||||||
|
listingStatus,
|
||||||
|
categoryId: listing.marketplace_listing_category_id,
|
||||||
|
imageUrl,
|
||||||
|
videoUrl,
|
||||||
|
seller,
|
||||||
|
deliveryTypes: listing.delivery_types,
|
||||||
};
|
};
|
||||||
|
|
||||||
results.push(listingDetails);
|
results.push(listingDetails);
|
||||||
|
|||||||
Reference in New Issue
Block a user