refactor: add facebook response classification
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import {
|
||||
classifyFacebookResponse,
|
||||
ensureFacebookCookies,
|
||||
extractFacebookItemData,
|
||||
extractFacebookMarketplaceData,
|
||||
@@ -571,6 +572,126 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
const result = extractFacebookMarketplaceData(html);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("classifies Comet search responses", () => {
|
||||
const html = `
|
||||
<html>
|
||||
<head><title>Marketplace</title></head>
|
||||
<body>
|
||||
<script>"XCometMarketplaceSearchController"</script>
|
||||
<script>{"routing_namespace":"fb_comet","use_ssr_state_manager":true}</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
expect(
|
||||
classifyFacebookResponse(
|
||||
html,
|
||||
"https://www.facebook.com/marketplace/toronto/search?query=bike",
|
||||
),
|
||||
).toEqual({
|
||||
kind: "search",
|
||||
authGated: false,
|
||||
unavailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
test("classifies Comet item responses", () => {
|
||||
const html = `
|
||||
<html>
|
||||
<body>
|
||||
<script>"XCometMarketplacePermalinkController"</script>
|
||||
<script>{"routing_namespace":"fb_comet"}</script>
|
||||
</body>
|
||||
</html>
|
||||
`;
|
||||
|
||||
expect(
|
||||
classifyFacebookResponse(
|
||||
html,
|
||||
"https://www.facebook.com/marketplace/item/123/",
|
||||
),
|
||||
).toEqual({
|
||||
kind: "item",
|
||||
authGated: false,
|
||||
unavailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
test("classifies login-gated responses before parsing", () => {
|
||||
const html = `<html><body>You must log in to Facebook</body></html>`;
|
||||
|
||||
expect(
|
||||
classifyFacebookResponse(
|
||||
html,
|
||||
"https://www.facebook.com/login/?next=%2Fmarketplace%2Fitem%2F123%2F",
|
||||
),
|
||||
).toEqual({
|
||||
kind: "auth_gated",
|
||||
authGated: true,
|
||||
unavailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
test("classifies unavailable item responses", () => {
|
||||
const html = `<html><body>Marketplace</body></html>`;
|
||||
|
||||
expect(
|
||||
classifyFacebookResponse(
|
||||
html,
|
||||
"https://www.facebook.com/marketplace/toronto/?unavailable_product=1",
|
||||
),
|
||||
).toEqual({
|
||||
kind: "unavailable",
|
||||
authGated: false,
|
||||
unavailable: true,
|
||||
});
|
||||
});
|
||||
|
||||
test("classifies unknown responses when no signal is present", () => {
|
||||
const html = `<html><body>Some random page</body></html>`;
|
||||
|
||||
expect(
|
||||
classifyFacebookResponse(
|
||||
html,
|
||||
"https://www.facebook.com/marketplace/toronto/",
|
||||
),
|
||||
).toEqual({
|
||||
kind: "unknown",
|
||||
authGated: false,
|
||||
unavailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
test("does not false-positive on incidental login text", () => {
|
||||
const html = `<html><body><footer>log in to Facebook to see your notifications</footer></body></html>`;
|
||||
|
||||
expect(
|
||||
classifyFacebookResponse(
|
||||
html,
|
||||
"https://www.facebook.com/marketplace/toronto/search?query=bike",
|
||||
),
|
||||
).toEqual({
|
||||
kind: "unknown",
|
||||
authGated: false,
|
||||
unavailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
test("detects auth gating from URL redirect", () => {
|
||||
const html = `<html><body>Redirecting...</body></html>`;
|
||||
|
||||
expect(
|
||||
classifyFacebookResponse(
|
||||
html,
|
||||
"https://www.facebook.com/login/?next=%2Fmarketplace%2Fitem%2F456%2F",
|
||||
),
|
||||
).toEqual({
|
||||
kind: "auth_gated",
|
||||
authGated: true,
|
||||
unavailable: false,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user