refactor: add facebook response classification

This commit is contained in:
2026-04-21 23:31:45 -04:00
parent 2617afc62f
commit b072599bc6
2 changed files with 167 additions and 13 deletions

View File

@@ -1,5 +1,6 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import {
classifyFacebookResponse,
ensureFacebookCookies,
extractFacebookItemData,
extractFacebookMarketplaceData,
@@ -571,6 +572,126 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
const result = extractFacebookMarketplaceData(html);
expect(result).toBeNull();
});
test("classifies Comet search responses", () => {
const html = `
<html>
<head><title>Marketplace</title></head>
<body>
<script>"XCometMarketplaceSearchController"</script>
<script>{"routing_namespace":"fb_comet","use_ssr_state_manager":true}</script>
</body>
</html>
`;
expect(
classifyFacebookResponse(
html,
"https://www.facebook.com/marketplace/toronto/search?query=bike",
),
).toEqual({
kind: "search",
authGated: false,
unavailable: false,
});
});
test("classifies Comet item responses", () => {
const html = `
<html>
<body>
<script>"XCometMarketplacePermalinkController"</script>
<script>{"routing_namespace":"fb_comet"}</script>
</body>
</html>
`;
expect(
classifyFacebookResponse(
html,
"https://www.facebook.com/marketplace/item/123/",
),
).toEqual({
kind: "item",
authGated: false,
unavailable: false,
});
});
test("classifies login-gated responses before parsing", () => {
const html = `<html><body>You must log in to Facebook</body></html>`;
expect(
classifyFacebookResponse(
html,
"https://www.facebook.com/login/?next=%2Fmarketplace%2Fitem%2F123%2F",
),
).toEqual({
kind: "auth_gated",
authGated: true,
unavailable: false,
});
});
test("classifies unavailable item responses", () => {
const html = `<html><body>Marketplace</body></html>`;
expect(
classifyFacebookResponse(
html,
"https://www.facebook.com/marketplace/toronto/?unavailable_product=1",
),
).toEqual({
kind: "unavailable",
authGated: false,
unavailable: true,
});
});
test("classifies unknown responses when no signal is present", () => {
const html = `<html><body>Some random page</body></html>`;
expect(
classifyFacebookResponse(
html,
"https://www.facebook.com/marketplace/toronto/",
),
).toEqual({
kind: "unknown",
authGated: false,
unavailable: false,
});
});
test("does not false-positive on incidental login text", () => {
const html = `<html><body><footer>log in to Facebook to see your notifications</footer></body></html>`;
expect(
classifyFacebookResponse(
html,
"https://www.facebook.com/marketplace/toronto/search?query=bike",
),
).toEqual({
kind: "unknown",
authGated: false,
unavailable: false,
});
});
test("detects auth gating from URL redirect", () => {
const html = `<html><body>Redirecting...</body></html>`;
expect(
classifyFacebookResponse(
html,
"https://www.facebook.com/login/?next=%2Fmarketplace%2Fitem%2F456%2F",
),
).toEqual({
kind: "auth_gated",
authGated: true,
unavailable: false,
});
});
});
});