docs: add facebook comet rewrite plan
This commit is contained in:
772
docs/superpowers/plans/2026-04-21-facebook-comet-rewrite.md
Normal file
772
docs/superpowers/plans/2026-04-21-facebook-comet-rewrite.md
Normal file
@@ -0,0 +1,772 @@
|
||||
# Facebook Comet Rewrite Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Replace the legacy Facebook Marketplace scraper with a route-aware hybrid Comet-bootstrap parser for both search and item routes.
|
||||
|
||||
**Architecture:** Keep authenticated direct HTTP fetches as the transport. Classify each Facebook response first, then parse route-specific Comet bootstrap/state candidates, and fall back to rendered-HTML extraction only when bootstrap decoding cannot produce the expected search or item shape.
|
||||
|
||||
**Tech Stack:** Bun, TypeScript, `bun:test`, `linkedom`, existing shared cookie/http helpers
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
- Owns Facebook fetch flow, response classification, bootstrap candidate extraction, search parsing, item parsing, and HTML fallbacks.
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
- Owns unit coverage for response classification, bootstrap parsing, fallback parsing, and route-aware item/search extraction behavior.
|
||||
- Modify: `packages/core/test/facebook-integration.test.ts`
|
||||
- Owns higher-level fetch flow tests, auth/degradation behavior, and result shaping for search/item entrypoints.
|
||||
|
||||
### Task 1: Add Route Classification Coverage
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
Add these tests near the Facebook parser tests in `packages/core/test/facebook-core.test.ts`:
|
||||
|
||||
```ts
|
||||
test("classifies Comet search responses", () => {
|
||||
const html = `
|
||||
<html>
|
||||
<head><title>Marketplace</title></head>
|
||||
<body>
|
||||
<script>"XCometMarketplaceSearchController"</script>
|
||||
<script>{"routing_namespace":"fb_comet","use_ssr_state_manager":true}</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
expect(classifyFacebookResponse(html, "https://www.facebook.com/marketplace/toronto/search?query=bike")).toEqual({
|
||||
kind: "search",
|
||||
authGated: false,
|
||||
unavailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
test("classifies Comet item responses", () => {
|
||||
const html = `
|
||||
<html>
|
||||
<body>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<script>{"routing_namespace":"fb_comet"}</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
expect(classifyFacebookResponse(html, "https://www.facebook.com/marketplace/item/123/")).toEqual({
|
||||
kind: "item",
|
||||
authGated: false,
|
||||
unavailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
test("classifies login-gated responses before parsing", () => {
|
||||
const html = `<html><body>You must log in to Facebook</body></html>`;
|
||||
|
||||
expect(classifyFacebookResponse(html, "https://www.facebook.com/login/?next=%2Fmarketplace%2Fitem%2F123%2F")).toEqual({
|
||||
kind: "auth_gated",
|
||||
authGated: true,
|
||||
unavailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
test("classifies unavailable item responses", () => {
|
||||
const html = `<html><body>Marketplace</body></html>`;
|
||||
|
||||
expect(classifyFacebookResponse(html, "https://www.facebook.com/marketplace/toronto/?unavailable_product=1")).toEqual({
|
||||
kind: "unavailable",
|
||||
authGated: false,
|
||||
unavailable: true,
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
|
||||
Expected: FAIL because `classifyFacebookResponse` does not exist yet.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Add this type and function near the parsing section in `packages/core/src/scrapers/facebook.ts`:
|
||||
|
||||
```ts
|
||||
type FacebookResponseKind = "search" | "item" | "auth_gated" | "unavailable" | "unknown";
|
||||
|
||||
export function classifyFacebookResponse(htmlString: HTMLString, responseUrl: string) {
|
||||
const authGated =
|
||||
responseUrl.includes("/login/") ||
|
||||
htmlString.includes("You must log in to Facebook") ||
|
||||
htmlString.includes("log in to Facebook");
|
||||
|
||||
if (authGated) {
|
||||
return { kind: "auth_gated" as const, authGated: true, unavailable: false };
|
||||
}
|
||||
|
||||
const unavailable = responseUrl.includes("unavailable_product=1");
|
||||
if (unavailable) {
|
||||
return { kind: "unavailable" as const, authGated: false, unavailable: true };
|
||||
}
|
||||
|
||||
if (htmlString.includes("XCometMarketplaceSearchController")) {
|
||||
return { kind: "search" as const, authGated: false, unavailable: false };
|
||||
}
|
||||
|
||||
if (htmlString.includes("XCometMarketplacePermalinkController")) {
|
||||
return { kind: "item" as const, authGated: false, unavailable: false };
|
||||
}
|
||||
|
||||
return { kind: "unknown" as const, authGated: false, unavailable: false };
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "classifies"`
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/src/scrapers/facebook.ts packages/core/test/facebook-core.test.ts
|
||||
git commit -m "refactor: add facebook response classification"
|
||||
```
|
||||
|
||||
### Task 2: Add Bootstrap Candidate Extraction
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
Add these tests:
|
||||
|
||||
```ts
|
||||
test("extracts Comet bootstrap candidates from script tags", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>{"routing_namespace":"fb_comet"}</script>
|
||||
<script>{"data":{"marketplace_search_bootstrap":{"edges":[{"node":{"listing":{"id":"1"}}}]}}}</script>
|
||||
<script>not json</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const candidates = extractFacebookBootstrapCandidates(html);
|
||||
expect(candidates).toHaveLength(2);
|
||||
expect(candidates[1]).toEqual({
|
||||
data: {
|
||||
marketplace_search_bootstrap: {
|
||||
edges: [{ node: { listing: { id: "1" } } }],
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test("keeps candidate order stable for later scoring", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>{"marker":"first"}</script>
|
||||
<script>{"marker":"second"}</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const candidates = extractFacebookBootstrapCandidates(html);
|
||||
expect(candidates.map((candidate) => candidate.marker)).toEqual(["first", "second"]);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
|
||||
Expected: FAIL because `extractFacebookBootstrapCandidates` does not exist.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Add this helper near the parser utilities in `packages/core/src/scrapers/facebook.ts`:
|
||||
|
||||
```ts
|
||||
export function extractFacebookBootstrapCandidates(htmlString: HTMLString): Record<string, unknown>[] {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const scripts = document.querySelectorAll("script");
|
||||
const candidates: Record<string, unknown>[] = [];
|
||||
|
||||
for (const script of Array.from(scripts) as HTMLScriptElement[]) {
|
||||
const scriptText = script.textContent?.trim();
|
||||
if (!scriptText) continue;
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(scriptText);
|
||||
if (isRecord(parsed)) {
|
||||
candidates.push(parsed);
|
||||
}
|
||||
} catch {
|
||||
// Ignore non-JSON script bodies.
|
||||
}
|
||||
}
|
||||
|
||||
return candidates;
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "bootstrap candidates"`
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/src/scrapers/facebook.ts packages/core/test/facebook-core.test.ts
|
||||
git commit -m "refactor: add facebook bootstrap candidate extraction"
|
||||
```
|
||||
|
||||
### Task 3: Replace Search Parsing With Candidate Scoring
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
- Modify: `packages/core/test/facebook-integration.test.ts`
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
- Test: `packages/core/test/facebook-integration.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
Add a core test for route-aware search extraction:
|
||||
|
||||
```ts
|
||||
test("extracts search results from Comet bootstrap candidates", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplaceSearchController"</script>
|
||||
<script>
|
||||
${JSON.stringify({
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Bike",
|
||||
listing_price: {
|
||||
amount: "120.00",
|
||||
formatted_amount: "CA$120",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: {
|
||||
reverse_geocode: {
|
||||
city_page: { display_name: "Toronto" },
|
||||
},
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const ads = extractFacebookMarketplaceData(html);
|
||||
expect(ads).toHaveLength(1);
|
||||
expect(ads?.[0].node.listing.marketplace_listing_title).toBe("Bike");
|
||||
});
|
||||
```
|
||||
|
||||
Replace one integration fixture with a current-shape search fixture:
|
||||
|
||||
```ts
|
||||
const mockSearchHtml = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplaceSearchController"</script>
|
||||
<script>${JSON.stringify({
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "iPhone 13",
|
||||
listing_price: {
|
||||
amount: "500.00",
|
||||
formatted_amount: "CA$500",
|
||||
currency: "CAD",
|
||||
},
|
||||
location: { reverse_geocode: { city_page: { display_name: "Toronto" } } },
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})}</script>
|
||||
</body></html>
|
||||
`;
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet bootstrap candidates"`
|
||||
Expected: FAIL because the current search extractor only understands legacy `marketplace_search` shapes.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Replace the search extraction internals in `extractFacebookMarketplaceData()` with candidate scoring like this:
|
||||
|
||||
```ts
|
||||
function findSearchEdges(candidate: unknown): FacebookEdge[] | null {
|
||||
if (Array.isArray(candidate)) {
|
||||
for (const item of candidate) {
|
||||
const result = findSearchEdges(item);
|
||||
if (result) return result;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isRecord(candidate)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const directEdges = candidate.feed_units?.edges;
|
||||
if (Array.isArray(directEdges)) {
|
||||
return directEdges as FacebookEdge[];
|
||||
}
|
||||
|
||||
const resultGroups = candidate.resultGroups;
|
||||
if (Array.isArray(resultGroups)) {
|
||||
for (const group of resultGroups) {
|
||||
if (isRecord(group) && Array.isArray(group.edges)) {
|
||||
return group.edges as FacebookEdge[];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const value of Object.values(candidate)) {
|
||||
const result = findSearchEdges(value);
|
||||
if (result) return result;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function extractFacebookMarketplaceData(htmlString: HTMLString): FacebookAdNode[] | null {
|
||||
const candidates = extractFacebookBootstrapCandidates(htmlString);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const edges = findSearchEdges(candidate);
|
||||
if (edges?.length) {
|
||||
return edges.map((edge) => ({ node: edge.node }));
|
||||
}
|
||||
}
|
||||
|
||||
console.warn("No marketplace data found in HTML response");
|
||||
return null;
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
|
||||
Expected: PASS for the rewritten search fixtures and existing unaffected tests.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/src/scrapers/facebook.ts packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts
|
||||
git commit -m "refactor: rewrite facebook search parser for comet bootstrap"
|
||||
```
|
||||
|
||||
### Task 4: Replace Item Parsing With Candidate Scoring
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
Replace one old item fixture with a current-shape item fixture:
|
||||
|
||||
```ts
|
||||
test("extracts item details from Comet permalink bootstrap candidates", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<script>
|
||||
${JSON.stringify({
|
||||
payload: {
|
||||
listing: {
|
||||
id: "123",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: "Vintage Chair",
|
||||
formatted_price: { text: "CA$80" },
|
||||
listing_price: { amount: "80.00", currency: "CAD", amount_with_offset: "80.00" },
|
||||
redacted_description: { text: "Solid wood chair" },
|
||||
location_text: { text: "Toronto, ON" },
|
||||
marketplace_listing_seller: { id: "seller-1", name: "Alex" },
|
||||
condition: "USED",
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const item = extractFacebookItemData(html);
|
||||
expect(item?.id).toBe("123");
|
||||
expect(item?.marketplace_listing_title).toBe("Vintage Chair");
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "Comet permalink bootstrap"`
|
||||
Expected: FAIL because the current item extractor depends on legacy permalink markers.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Replace the item extraction internals with a semantic candidate finder like this:
|
||||
|
||||
```ts
|
||||
function findMarketplaceItemCandidate(candidate: unknown): FacebookMarketplaceItem | null {
|
||||
if (Array.isArray(candidate)) {
|
||||
for (const item of candidate) {
|
||||
const result = findMarketplaceItemCandidate(item);
|
||||
if (result) return result;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isRecord(candidate)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (
|
||||
candidate.id &&
|
||||
candidate.__typename === "GroupCommerceProductItem" &&
|
||||
candidate.marketplace_listing_title
|
||||
) {
|
||||
return candidate as FacebookMarketplaceItem;
|
||||
}
|
||||
|
||||
for (const value of Object.values(candidate)) {
|
||||
const result = findMarketplaceItemCandidate(value);
|
||||
if (result) return result;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function extractFacebookItemData(htmlString: HTMLString): FacebookMarketplaceItem | null {
|
||||
const candidates = extractFacebookBootstrapCandidates(htmlString);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const item = findMarketplaceItemCandidate(candidate);
|
||||
if (item) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts`
|
||||
Expected: PASS for current-shape item tests and remaining parser tests.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/src/scrapers/facebook.ts packages/core/test/facebook-core.test.ts
|
||||
git commit -m "refactor: rewrite facebook item parser for comet bootstrap"
|
||||
```
|
||||
|
||||
### Task 5: Add HTML Fallback Extraction
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/core/test/facebook-core.test.ts`
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
- Test: `packages/core/test/facebook-core.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
Add these fallback tests:
|
||||
|
||||
```ts
|
||||
test("falls back to rendered search HTML when bootstrap payloads are undecodable", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplaceSearchController"</script>
|
||||
<a href="https://www.facebook.com/marketplace/item/123/?ref=search">Vintage Lamp</a>
|
||||
<span>CA$45</span>
|
||||
<span>Toronto, ON</span>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const ads = extractFacebookMarketplaceData(html);
|
||||
const parsed = ads ? parseFacebookAds(ads) : [];
|
||||
expect(parsed[0].title).toBe("Vintage Lamp");
|
||||
expect(parsed[0].listingPrice?.amountFormatted).toBe("CA$45");
|
||||
});
|
||||
|
||||
test("falls back to rendered item HTML when bootstrap payloads are undecodable", () => {
|
||||
const html = `
|
||||
<html><body>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<h1>Vintage Desk</h1>
|
||||
<span>CA$120</span>
|
||||
<span>Condition Used - Good</span>
|
||||
<div>Description Solid oak desk.</div>
|
||||
<div>Seller information Jordan</div>
|
||||
</body></html>
|
||||
`;
|
||||
|
||||
const item = extractFacebookItemData(html);
|
||||
expect(item?.marketplace_listing_title).toBe("Vintage Desk");
|
||||
expect(item?.formatted_price?.text).toBe("CA$120");
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
|
||||
Expected: FAIL because the extractor currently returns `null` without a structured candidate.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Add route-specific HTML fallback helpers in `packages/core/src/scrapers/facebook.ts`:
|
||||
|
||||
```ts
|
||||
function extractSearchFallback(htmlString: HTMLString): FacebookAdNode[] | null {
|
||||
const idMatch = htmlString.match(/marketplace\/item\/(\d+)/);
|
||||
const titleMatch = htmlString.match(/marketplace\/item\/\d+\/[^>]*>([^<]+)</);
|
||||
const priceMatch = htmlString.match(/CA\$\d+(?:,\d{3})*(?:\.\d{2})?/);
|
||||
const cityMatch = htmlString.match(/([A-Z][a-z]+,\s*[A-Z]{2})/);
|
||||
|
||||
if (!idMatch || !titleMatch || !priceMatch) return null;
|
||||
|
||||
return [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: idMatch[1],
|
||||
marketplace_listing_title: titleMatch[1].trim(),
|
||||
listing_price: {
|
||||
amount: priceMatch[0].replace("CA$", "").replace(/,/g, ""),
|
||||
formatted_amount: priceMatch[0],
|
||||
currency: "CAD",
|
||||
},
|
||||
location: cityMatch
|
||||
? { reverse_geocode: { city_page: { display_name: cityMatch[1].split(",")[0] } } }
|
||||
: undefined,
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
function extractItemFallback(htmlString: HTMLString): FacebookMarketplaceItem | null {
|
||||
const titleMatch = htmlString.match(/<h1[^>]*>([^<]+)<\/h1>/i);
|
||||
const priceMatch = htmlString.match(/CA\$\d+(?:,\d{3})*(?:\.\d{2})?/);
|
||||
if (!titleMatch || !priceMatch) return null;
|
||||
|
||||
return {
|
||||
id: "fallback-item",
|
||||
__typename: "GroupCommerceProductItem",
|
||||
marketplace_listing_title: titleMatch[1].trim(),
|
||||
formatted_price: { text: priceMatch[0] },
|
||||
listing_price: {
|
||||
amount: priceMatch[0].replace("CA$", "").replace(/,/g, ""),
|
||||
currency: "CAD",
|
||||
amount_with_offset: priceMatch[0].replace("CA$", "").replace(/,/g, ""),
|
||||
},
|
||||
redacted_description: { text: htmlString.includes("Description") ? htmlString.split("Description")[1].split("<")[0].trim() : "" },
|
||||
is_live: true,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
Then call these helpers as the last fallback inside `extractFacebookMarketplaceData()` and `extractFacebookItemData()`.
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts --test-name-pattern "falls back"`
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/src/scrapers/facebook.ts packages/core/test/facebook-core.test.ts
|
||||
git commit -m "refactor: add facebook html fallbacks"
|
||||
```
|
||||
|
||||
### Task 6: Wire Route-Aware Failures Into Entry Points
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/core/test/facebook-integration.test.ts`
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts`
|
||||
- Test: `packages/core/test/facebook-integration.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing tests**
|
||||
|
||||
Add these integration tests:
|
||||
|
||||
```ts
|
||||
test("returns empty search results for auth-gated search HTML", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
url: "https://www.facebook.com/login/?next=%2Fmarketplace%2Ftoronto%2Fsearch",
|
||||
text: () => Promise.resolve("<html><body>You must log in to Facebook</body></html>"),
|
||||
headers: { get: () => null },
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems("bike", 1, "toronto", 25);
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("returns null for unavailable item responses", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
url: "https://www.facebook.com/marketplace/toronto/?unavailable_product=1",
|
||||
text: () => Promise.resolve("<html><body>Marketplace</body></html>"),
|
||||
headers: { get: () => null },
|
||||
}),
|
||||
);
|
||||
|
||||
const item = await fetchFacebookItem("123");
|
||||
expect(item).toBeNull();
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run test to verify it fails**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-integration.test.ts --test-name-pattern "auth-gated|unavailable"`
|
||||
Expected: FAIL because the entrypoints do not yet classify successful HTML responses by route/auth state.
|
||||
|
||||
- [ ] **Step 3: Write minimal implementation**
|
||||
|
||||
Update both entrypoints to classify successful HTML before parsing:
|
||||
|
||||
```ts
|
||||
const responseClass = classifyFacebookResponse(searchHtml, searchUrl);
|
||||
if (responseClass.kind === "auth_gated") {
|
||||
console.warn("Facebook marketplace search is auth-gated. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.");
|
||||
return [];
|
||||
}
|
||||
|
||||
const itemResponseClass = classifyFacebookResponse(itemHtml, itemUrl);
|
||||
if (itemResponseClass.kind === "auth_gated") {
|
||||
console.warn(`Authentication failed for item ${itemId}. Cookies may be expired.`);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (itemResponseClass.kind === "unavailable") {
|
||||
console.warn(`Item ${itemId} appears to be unavailable in the marketplace.`);
|
||||
return null;
|
||||
}
|
||||
```
|
||||
|
||||
Use the actual response URL from `fetchHtml` plumbing if that helper is extended to return both HTML and final URL; otherwise start by threading final URL support through the fetch helper in the same task.
|
||||
|
||||
- [ ] **Step 4: Run test to verify it passes**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-integration.test.ts`
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/src/scrapers/facebook.ts packages/core/test/facebook-integration.test.ts
|
||||
git commit -m "refactor: handle facebook route-aware failure states"
|
||||
```
|
||||
|
||||
### Task 7: Run Full Verification And Live Probe
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/core/src/scrapers/facebook.ts` if small cleanup is required
|
||||
- Modify: `packages/core/test/facebook-core.test.ts` if small cleanup is required
|
||||
- Modify: `packages/core/test/facebook-integration.test.ts` if small cleanup is required
|
||||
|
||||
- [ ] **Step 1: Run focused Facebook tests**
|
||||
|
||||
Run: `bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts`
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 2: Run broader core tests**
|
||||
|
||||
Run: `bun test packages/core/test`
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 3: Run live authenticated Facebook probe**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
set -a && source .env && set +a && bun --eval 'import { fetchFacebookItems, fetchFacebookItem } from "./packages/core/src/index.ts";
|
||||
const results = await fetchFacebookItems("iphone", 1, "toronto", 3);
|
||||
console.log("SEARCH_COUNT=" + results.length);
|
||||
console.log(JSON.stringify(results[0] ?? null));
|
||||
if (results[0]?.url) {
|
||||
const match = results[0].url.match(/\/item\/(\d+)/);
|
||||
if (match) {
|
||||
const item = await fetchFacebookItem(match[1]);
|
||||
console.log(JSON.stringify(item));
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
Expected:
|
||||
|
||||
- search returns at least one result
|
||||
- item fetch returns non-null for the first live result when the route is not stale/unavailable
|
||||
|
||||
- [ ] **Step 4: Make any minimal cleanup needed to keep tests and live probe green**
|
||||
|
||||
If cleanup is needed, keep it limited to naming, dead-code removal caused by the rewrite, or small parser corrections directly exposed by the verification commands.
|
||||
|
||||
- [ ] **Step 5: Re-run verification**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
bun test packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts && bun test packages/core/test
|
||||
```
|
||||
|
||||
Expected: PASS
|
||||
|
||||
- [ ] **Step 6: Commit**
|
||||
|
||||
```bash
|
||||
git add packages/core/src/scrapers/facebook.ts packages/core/test/facebook-core.test.ts packages/core/test/facebook-integration.test.ts
|
||||
git commit -m "refactor: complete facebook comet scraper rewrite"
|
||||
```
|
||||
|
||||
## Self-Review
|
||||
|
||||
- Spec coverage: the plan covers classification, route-aware search parsing, route-aware item parsing, HTML fallbacks, explicit failure-state handling, test replacement, and live verification.
|
||||
- Placeholder scan: no `TODO`, `TBD`, or unspecified “handle appropriately” steps remain.
|
||||
- Type consistency: all planned functions and types use the same names across tasks: `classifyFacebookResponse`, `extractFacebookBootstrapCandidates`, `extractFacebookMarketplaceData`, and `extractFacebookItemData`.
|
||||
Reference in New Issue
Block a user