refactor: rewrite facebook item parser for comet bootstrap
This commit is contained in:
@@ -496,6 +496,80 @@ function findSearchEdges(
|
||||
return bestMatch;
|
||||
}
|
||||
|
||||
interface FacebookMarketplaceItemMatch {
|
||||
item: FacebookMarketplaceItem;
|
||||
score: number;
|
||||
path: string[];
|
||||
}
|
||||
|
||||
function scoreMarketplaceItemPath(path: string[]): number {
|
||||
let score = 0;
|
||||
|
||||
if (path.includes("payload")) {
|
||||
score += 2;
|
||||
}
|
||||
|
||||
if (path.includes("viewer")) {
|
||||
score += 2;
|
||||
}
|
||||
|
||||
if (path.includes("marketplace_product_details_page")) {
|
||||
score += 6;
|
||||
}
|
||||
|
||||
if (path.includes("target")) {
|
||||
score += 8;
|
||||
}
|
||||
|
||||
if (path.includes("listing")) {
|
||||
score += 6;
|
||||
}
|
||||
|
||||
if (
|
||||
path.some(
|
||||
(segment) =>
|
||||
segment.includes("recommend") || segment.includes("related"),
|
||||
)
|
||||
) {
|
||||
score -= 10;
|
||||
}
|
||||
|
||||
return score - path.length;
|
||||
}
|
||||
|
||||
function collectMarketplaceItemCandidates(
|
||||
candidate: unknown,
|
||||
path: string[] = [],
|
||||
): FacebookMarketplaceItemMatch[] {
|
||||
if (Array.isArray(candidate)) {
|
||||
return candidate.flatMap((item) => collectMarketplaceItemCandidates(item, path));
|
||||
}
|
||||
|
||||
if (!isRecord(candidate)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const matches: FacebookMarketplaceItemMatch[] = [];
|
||||
|
||||
if (
|
||||
typeof candidate.id === "string" &&
|
||||
candidate.__typename === "GroupCommerceProductItem" &&
|
||||
typeof candidate.marketplace_listing_title === "string"
|
||||
) {
|
||||
matches.push({
|
||||
item: candidate as FacebookMarketplaceItem,
|
||||
score: scoreMarketplaceItemPath(path),
|
||||
path,
|
||||
});
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(candidate)) {
|
||||
matches.push(...collectMarketplaceItemCandidates(value, [...path, key]));
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
/**
|
||||
Extract marketplace search data from Facebook page script tags
|
||||
*/
|
||||
@@ -531,139 +605,29 @@ export function extractFacebookMarketplaceData(
|
||||
|
||||
/**
|
||||
Extract marketplace item details from Facebook item page HTML
|
||||
Updated for 2026 Facebook Marketplace API structure with multiple extraction paths
|
||||
Updated for 2026 Facebook Marketplace bootstrap candidates
|
||||
*/
|
||||
export function extractFacebookItemData(
|
||||
htmlString: HTMLString,
|
||||
): FacebookMarketplaceItem | null {
|
||||
const { document } = parseHTML(htmlString);
|
||||
const scripts = document.querySelectorAll("script");
|
||||
const candidates = extractFacebookBootstrapCandidates(htmlString);
|
||||
let bestMatch: FacebookMarketplaceItemMatch | null = null;
|
||||
|
||||
for (const script of scripts) {
|
||||
const scriptText = script.textContent;
|
||||
if (!scriptText) continue;
|
||||
for (const candidate of candidates) {
|
||||
const matches = collectMarketplaceItemCandidates(candidate);
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(scriptText);
|
||||
|
||||
// Check for the require structure with marketplace product details
|
||||
if (parsed.require && Array.isArray(parsed.require)) {
|
||||
// Try multiple extraction paths discovered from reverse engineering
|
||||
const extractionPaths = [
|
||||
// Path 1: Primary path from current API structure
|
||||
() =>
|
||||
parsed.require[0][3].__bbox.result.data.viewer
|
||||
.marketplace_product_details_page.target,
|
||||
// Path 2: Alternative path with nested require
|
||||
() =>
|
||||
parsed.require[0][3][0].__bbox.require[3][3][1].__bbox.result.data
|
||||
.viewer.marketplace_product_details_page.target,
|
||||
// Path 3: Variation without the [0] index
|
||||
() =>
|
||||
parsed.require[0][3].__bbox.require[3][3][1].__bbox.result.data
|
||||
.viewer.marketplace_product_details_page.target,
|
||||
// Path 4-5: Additional fallback paths for edge cases
|
||||
() =>
|
||||
parsed.require[0][3][1]?.__bbox?.result?.data?.viewer
|
||||
?.marketplace_product_details_page?.target,
|
||||
() =>
|
||||
parsed.require[0][3][2]?.__bbox?.result?.data?.viewer
|
||||
?.marketplace_product_details_page?.target,
|
||||
];
|
||||
|
||||
let pathIndex = 0;
|
||||
for (const getPath of extractionPaths) {
|
||||
try {
|
||||
const targetData = getPath();
|
||||
if (
|
||||
targetData &&
|
||||
typeof targetData === "object" &&
|
||||
targetData.id &&
|
||||
targetData.marketplace_listing_title &&
|
||||
targetData.__typename === "GroupCommerceProductItem"
|
||||
) {
|
||||
console.log(
|
||||
`Successfully extracted Facebook item data using extraction path ${pathIndex + 1}`,
|
||||
);
|
||||
return targetData as FacebookMarketplaceItem;
|
||||
}
|
||||
} catch {
|
||||
// Path not found or invalid, try next path
|
||||
}
|
||||
pathIndex++;
|
||||
}
|
||||
|
||||
// Fallback: Search recursively for marketplace data in the parsed structure
|
||||
const findMarketplaceData = (
|
||||
obj: unknown,
|
||||
depth = 0,
|
||||
maxDepth = 10,
|
||||
): FacebookMarketplaceItem | null => {
|
||||
if (depth > maxDepth) return null; // Prevent infinite recursion
|
||||
if (isRecord(obj)) {
|
||||
// Check if this object matches the expected marketplace item structure
|
||||
const candidate = obj as Record<string, unknown>;
|
||||
if (
|
||||
candidate.marketplace_listing_title &&
|
||||
candidate.id &&
|
||||
candidate.__typename === "GroupCommerceProductItem" &&
|
||||
candidate.redacted_description
|
||||
) {
|
||||
return candidate as unknown as FacebookMarketplaceItem;
|
||||
}
|
||||
// Recursively search nested objects and arrays
|
||||
for (const key in obj) {
|
||||
const value = obj[key];
|
||||
if (isRecord(value) || Array.isArray(value)) {
|
||||
const result = findMarketplaceData(value, depth + 1, maxDepth);
|
||||
if (result) return result;
|
||||
}
|
||||
}
|
||||
} else if (Array.isArray(obj)) {
|
||||
// Search through arrays
|
||||
for (const item of obj) {
|
||||
const result = findMarketplaceData(item, depth + 1, maxDepth);
|
||||
if (result) return result;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
// Search through the entire require structure
|
||||
const recursiveResult = findMarketplaceData(parsed.require);
|
||||
if (recursiveResult) {
|
||||
console.log(
|
||||
"Successfully extracted Facebook item data using recursive search",
|
||||
);
|
||||
return recursiveResult;
|
||||
}
|
||||
|
||||
// Additional search in other potential locations
|
||||
if (
|
||||
parsed.__bbox?.result?.data?.viewer?.marketplace_product_details_page
|
||||
?.target
|
||||
) {
|
||||
const bboxData =
|
||||
parsed.__bbox.result.data.viewer.marketplace_product_details_page
|
||||
.target;
|
||||
if (
|
||||
bboxData &&
|
||||
typeof bboxData === "object" &&
|
||||
bboxData.id &&
|
||||
bboxData.marketplace_listing_title &&
|
||||
bboxData.__typename === "GroupCommerceProductItem"
|
||||
) {
|
||||
console.log(
|
||||
"Successfully extracted Facebook item data from __bbox structure",
|
||||
);
|
||||
return bboxData as FacebookMarketplaceItem;
|
||||
}
|
||||
}
|
||||
for (const match of matches) {
|
||||
if (
|
||||
!bestMatch ||
|
||||
match.score > bestMatch.score ||
|
||||
(match.score === bestMatch.score && match.path.length < bestMatch.path.length)
|
||||
) {
|
||||
bestMatch = match;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return bestMatch?.item ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user