feat: port upstream scraper improvements to monorepo
Kijiji improvements: - Add error classes: NetworkError, ParseError, RateLimitError, ValidationError - Add exponential backoff with jitter for retries - Add request timeout (30s abort) - Add pagination support (SearchOptions.maxPages) - Add location/category mappings and resolution functions - Add enhanced DetailedListing interface with images, seller info, attributes - Add GraphQL client for seller details Facebook improvements: - Add parseFacebookCookieString() for parsing cookie strings - Add ensureFacebookCookies() with env var fallback - Add extractFacebookItemData() with multiple extraction paths - Add fetchFacebookItem() for individual item fetching - Add extraction metrics and API stability monitoring - Add vehicle-specific field extraction - Improve error handling with specific guidance for auth errors Shared utilities: - Update http.ts with new error classes and improved fetchHtml Documentation: - Port KIJIJI.md, FMARKETPLACE.md, AGENTS.md from upstream Tests: - Port kijiji-core, kijiji-integration, kijiji-utils tests - Port facebook-core, facebook-integration tests - Add test setup file Scripts: - Port parse-facebook-cookies.ts script Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
183
packages/core/scripts/parse-facebook-cookies.ts
Normal file
183
packages/core/scripts/parse-facebook-cookies.ts
Normal file
@@ -0,0 +1,183 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
/**
|
||||
* Facebook Cookie Parser CLI
|
||||
*
|
||||
* Parses Facebook cookie strings into JSON format for the marketplace scraper
|
||||
*
|
||||
* Usage:
|
||||
* bun run scripts/parse-facebook-cookies.ts "c_user=123; xs=abc"
|
||||
* bun run scripts/parse-facebook-cookies.ts --input cookies.txt
|
||||
* echo "c_user=123; xs=abc" | bun run scripts/parse-facebook-cookies.ts
|
||||
* bun run scripts/parse-facebook-cookies.ts "cookie_string" --output my-cookies.json
|
||||
*/
|
||||
|
||||
import { parseFacebookCookieString } from "../src/facebook";
|
||||
|
||||
interface Cookie {
|
||||
name: string;
|
||||
value: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
secure?: boolean;
|
||||
httpOnly?: boolean;
|
||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||
expirationDate?: number;
|
||||
storeId?: string;
|
||||
}
|
||||
|
||||
function parseFacebookCookieStringCLI(cookieString: string): Cookie[] {
|
||||
if (!cookieString || !cookieString.trim()) {
|
||||
console.error("❌ Error: Empty or invalid cookie string provided");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const cookies = parseFacebookCookieString(cookieString);
|
||||
|
||||
if (cookies.length === 0) {
|
||||
console.error("❌ Error: No valid cookies found in input string");
|
||||
console.error('Expected format: "name1=value1; name2=value2;"');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
return cookies;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0 && process.stdin.isTTY === false) {
|
||||
// Read from stdin
|
||||
let input = "";
|
||||
for await (const chunk of process.stdin) {
|
||||
input += chunk;
|
||||
}
|
||||
input = input.trim();
|
||||
|
||||
if (!input) {
|
||||
console.error("❌ Error: No input provided via stdin");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const cookies = parseFacebookCookieStringCLI(input);
|
||||
await writeOutput(cookies, "./cookies/facebook.json");
|
||||
return;
|
||||
}
|
||||
|
||||
let cookieString = "";
|
||||
let outputPath = "./cookies/facebook.json";
|
||||
let inputPath = "";
|
||||
|
||||
// Parse command line arguments
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const arg = args[i];
|
||||
|
||||
if (arg === "--input" || arg === "-i") {
|
||||
inputPath = args[i + 1];
|
||||
i++; // Skip next arg
|
||||
} else if (arg === "--output" || arg === "-o") {
|
||||
outputPath = args[i + 1];
|
||||
i++; // Skip next arg
|
||||
} else if (arg === "--help" || arg === "-h") {
|
||||
showHelp();
|
||||
return;
|
||||
} else if (!arg.startsWith("-")) {
|
||||
// Assume this is the cookie string
|
||||
cookieString = arg;
|
||||
} else {
|
||||
console.error(`❌ Unknown option: ${arg}`);
|
||||
showHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Read from file if specified
|
||||
if (inputPath) {
|
||||
try {
|
||||
const file = Bun.file(inputPath);
|
||||
if (!(await file.exists())) {
|
||||
console.error(`❌ Error: Input file not found: ${inputPath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
cookieString = await file.text();
|
||||
} catch (error) {
|
||||
console.error(`❌ Error reading input file: ${error}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (!cookieString.trim()) {
|
||||
console.error("❌ Error: No cookie string provided");
|
||||
console.error(
|
||||
"Provide cookie string as argument, --input file, or via stdin",
|
||||
);
|
||||
showHelp();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const cookies = parseFacebookCookieStringCLI(cookieString);
|
||||
await writeOutput(cookies, outputPath);
|
||||
}
|
||||
|
||||
async function writeOutput(cookies: Cookie[], outputPath: string) {
|
||||
try {
|
||||
await Bun.write(outputPath, JSON.stringify(cookies, null, 2));
|
||||
console.log(`✅ Successfully parsed ${cookies.length} Facebook cookies`);
|
||||
console.log(`📁 Saved to: ${outputPath}`);
|
||||
|
||||
// Show summary of parsed cookies
|
||||
console.log("\n📋 Parsed cookies:");
|
||||
for (const cookie of cookies) {
|
||||
console.log(
|
||||
` • ${cookie.name}: ${cookie.value.substring(0, 20)}${cookie.value.length > 20 ? "..." : ""}`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`❌ Error writing to output file: ${error}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
function showHelp() {
|
||||
console.log(`
|
||||
Facebook Cookie Parser CLI
|
||||
|
||||
Parses Facebook cookie strings into JSON format for the marketplace scraper.
|
||||
|
||||
USAGE:
|
||||
bun run scripts/parse-facebook-cookies.ts [OPTIONS] [COOKIE_STRING]
|
||||
|
||||
EXAMPLES:
|
||||
# Parse from command line argument
|
||||
bun run scripts/parse-facebook-cookies.ts "c_user=123; xs=abc"
|
||||
|
||||
# Parse from file
|
||||
bun run scripts/parse-facebook-cookies.ts --input cookies.txt
|
||||
|
||||
# Parse from stdin
|
||||
echo "c_user=123; xs=abc" | bun run scripts/parse-facebook-cookies.ts
|
||||
|
||||
# Output to custom file
|
||||
bun run scripts/parse-facebook-cookies.ts "cookie_string" --output my-cookies.json
|
||||
|
||||
OPTIONS:
|
||||
-i, --input FILE Read cookie string from file
|
||||
-o, --output FILE Output file path (default: ./cookies/facebook.json)
|
||||
-h, --help Show this help message
|
||||
|
||||
COOKIE FORMAT:
|
||||
Semicolon-separated name=value pairs
|
||||
Example: "c_user=123456789; xs=abcdef123456; fr=xyz789"
|
||||
|
||||
OUTPUT:
|
||||
JSON array of cookie objects saved to ./cookies/facebook.json
|
||||
`);
|
||||
}
|
||||
|
||||
// Run the CLI
|
||||
if (import.meta.main) {
|
||||
main().catch((error) => {
|
||||
console.error(`❌ Unexpected error: ${error}`);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user