Compare commits
10 Commits
ee0fca826d
...
df0c528535
| Author | SHA1 | Date | |
|---|---|---|---|
| df0c528535 | |||
| 2f97d3eafd | |||
| 65eb8d1724 | |||
| f3839aba54 | |||
| 90b98bfb09 | |||
| eb6705df0f | |||
| 72525609ed | |||
| 8b0a65860c | |||
| f9b1c7e096 | |||
| 9edc74cbeb |
9
opencode.jsonc
Normal file
9
opencode.jsonc
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"$schema": "https://opencode.ai/config.json",
|
||||
"mcp": {
|
||||
"marketplace-scrape": {
|
||||
"type": "remote",
|
||||
"url": "http://localhost:4006/mcp"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,13 @@
|
||||
"name": "marketplace-scrapers-monorepo",
|
||||
"version": "1.0.0",
|
||||
"scripts": {
|
||||
"ci": "biome ci"
|
||||
"ci": "biome ci",
|
||||
"clean": "rm -rf dist",
|
||||
"build:api": "bun build ./packages/api-server/src/index.ts --target=bun --outdir=./dist/api --minify",
|
||||
"build:mcp": "bun build ./packages/mcp-server/src/index.ts --target=bun --outdir=./dist/mcp --minify",
|
||||
"build:all": "bun run build:api && bun run build:mcp",
|
||||
"build": "bun run clean && bun run build:all",
|
||||
"start": "./scripts/start.sh"
|
||||
},
|
||||
"private": true,
|
||||
"type": "module",
|
||||
|
||||
@@ -46,7 +46,7 @@ export async function kijijiRoute(req: Request): Promise<Response> {
|
||||
try {
|
||||
const items = await fetchKijijiItems(
|
||||
SEARCH_QUERY,
|
||||
1,
|
||||
4, // 4 requests per second for faster scraping
|
||||
"https://www.kijiji.ca",
|
||||
searchOptions,
|
||||
{},
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
import { parseHTML } from "linkedom";
|
||||
import { delay } from "../utils/delay";
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
|
||||
import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import type { HTMLString } from "../types/common";
|
||||
@@ -842,8 +840,7 @@ export function parseFacebookAds(
|
||||
title,
|
||||
listingPrice: {
|
||||
amountFormatted:
|
||||
priceObj.formatted_amount ||
|
||||
formatCentsToCurrency(cents / 100, "en-CA"),
|
||||
priceObj.formatted_amount || formatCentsToCurrency(cents, "en-CA"),
|
||||
cents,
|
||||
currency: priceObj.currency || "CAD", // Facebook marketplace often uses CAD
|
||||
},
|
||||
@@ -890,8 +887,7 @@ export function parseFacebookItem(
|
||||
if (!Number.isNaN(amount)) {
|
||||
cents = Math.round(amount * 100);
|
||||
amountFormatted =
|
||||
item.formatted_price?.text ||
|
||||
formatCentsToCurrency(cents / 100, "en-CA");
|
||||
item.formatted_price?.text || formatCentsToCurrency(cents, "en-CA");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
|
||||
import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import unidecode from "unidecode";
|
||||
@@ -496,7 +494,7 @@ function _parseListing(
|
||||
|
||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||
const amountFormatted =
|
||||
cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined;
|
||||
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
|
||||
|
||||
const numberOfViews =
|
||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||
@@ -575,7 +573,7 @@ export async function parseDetailedListing(
|
||||
|
||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||
const amountFormatted =
|
||||
cents != null ? formatCentsToCurrency(cents / 100, "en-CA") : undefined;
|
||||
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
|
||||
|
||||
const numberOfViews =
|
||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||
@@ -758,51 +756,75 @@ export default async function fetchKijijiItems(
|
||||
`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`,
|
||||
);
|
||||
|
||||
// Fetch details for this page's listings
|
||||
const progressBar = new cliProgress.SingleBar(
|
||||
{},
|
||||
cliProgress.Presets.shades_classic,
|
||||
);
|
||||
// Fetch details for this page's listings with controlled concurrency
|
||||
const isTTY = process.stdout?.isTTY ?? false;
|
||||
const progressBar = isTTY
|
||||
? new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
|
||||
: null;
|
||||
const totalProgress = newListingLinks.length;
|
||||
let currentProgress = 0;
|
||||
progressBar.start(totalProgress, currentProgress);
|
||||
progressBar?.start(totalProgress, currentProgress);
|
||||
|
||||
for (const link of newListingLinks) {
|
||||
try {
|
||||
const html = await fetchHtml(link, DELAY_MS, {
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
const parsed = await parseDetailedListing(
|
||||
html,
|
||||
BASE_URL,
|
||||
finalListingOptions,
|
||||
// Process in batches for controlled concurrency
|
||||
const CONCURRENT_REQUESTS = REQUESTS_PER_SECOND * 2; // 2x rate for faster processing
|
||||
const results: (DetailedListing | null)[] = [];
|
||||
|
||||
for (let i = 0; i < newListingLinks.length; i += CONCURRENT_REQUESTS) {
|
||||
const batch = newListingLinks.slice(i, i + CONCURRENT_REQUESTS);
|
||||
const batchPromises = batch.map(async (link) => {
|
||||
try {
|
||||
const html = await fetchHtml(link, 0, {
|
||||
// No per-request delay, batch handles rate limit
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
console.log(
|
||||
`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`,
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
const parsed = await parseDetailedListing(
|
||||
html,
|
||||
BASE_URL,
|
||||
finalListingOptions,
|
||||
);
|
||||
return parsed;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
|
||||
);
|
||||
} else {
|
||||
console.error(
|
||||
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
|
||||
);
|
||||
}
|
||||
return null;
|
||||
} finally {
|
||||
currentProgress++;
|
||||
progressBar?.update(currentProgress);
|
||||
if (!progressBar) {
|
||||
console.log(`Progress: ${currentProgress}/${totalProgress}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const batchResults = await Promise.all(batchPromises);
|
||||
results.push(...batchResults);
|
||||
|
||||
// Wait between batches to respect rate limit
|
||||
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, DELAY_MS * batch.length),
|
||||
);
|
||||
if (parsed) {
|
||||
allListings.push(parsed);
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
|
||||
);
|
||||
} else {
|
||||
console.error(
|
||||
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
currentProgress++;
|
||||
progressBar.update(currentProgress);
|
||||
}
|
||||
}
|
||||
|
||||
progressBar.stop();
|
||||
allListings.push(
|
||||
...results.filter((r): r is DetailedListing => r !== null),
|
||||
);
|
||||
|
||||
progressBar?.stop();
|
||||
|
||||
// If we got fewer results than expected (40 per page), we've reached the end
|
||||
if (searchResults.length < 40) {
|
||||
|
||||
@@ -5,7 +5,7 @@ const PORT = process.env.MCP_PORT || 4006;
|
||||
|
||||
const server = Bun.serve({
|
||||
port: PORT as number | string,
|
||||
idleTimeout: 0,
|
||||
idleTimeout: 255, // 255 seconds (max allowed)
|
||||
routes: {
|
||||
// MCP metadata discovery endpoint
|
||||
"/.well-known/mcp/server-card.json": new Response(
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
import {
|
||||
fetchEbayItems,
|
||||
fetchFacebookItems,
|
||||
fetchKijijiItems,
|
||||
} from "@marketplace-scrapers/core";
|
||||
import { tools } from "./tools";
|
||||
|
||||
const API_BASE_URL = process.env.API_BASE_URL || "http://localhost:4005/api";
|
||||
const API_TIMEOUT = Number(process.env.API_TIMEOUT) || 180000; // 3 minutes default
|
||||
|
||||
/**
|
||||
* Handle MCP JSON-RPC 2.0 protocol requests
|
||||
*/
|
||||
@@ -105,24 +103,44 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const searchOptions = {
|
||||
location: args.location,
|
||||
category: args.category,
|
||||
keywords: args.keywords,
|
||||
sortBy: args.sortBy,
|
||||
sortOrder: args.sortOrder,
|
||||
maxPages: args.maxPages || 5,
|
||||
priceMin: args.priceMin,
|
||||
priceMax: args.priceMax,
|
||||
};
|
||||
const items = await fetchKijijiItems(
|
||||
query,
|
||||
1,
|
||||
"https://www.kijiji.ca",
|
||||
searchOptions,
|
||||
{},
|
||||
const params = new URLSearchParams({ q: query });
|
||||
if (args.location) params.append("location", args.location);
|
||||
if (args.category) params.append("category", args.category);
|
||||
if (args.keywords) params.append("keywords", args.keywords);
|
||||
if (args.sortBy) params.append("sortBy", args.sortBy);
|
||||
if (args.sortOrder) params.append("sortOrder", args.sortOrder);
|
||||
if (args.maxPages)
|
||||
params.append("maxPages", args.maxPages.toString());
|
||||
if (args.priceMin)
|
||||
params.append("priceMin", args.priceMin.toString());
|
||||
if (args.priceMax)
|
||||
params.append("priceMax", args.priceMax.toString());
|
||||
|
||||
console.log(
|
||||
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/kijiji?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] Kijiji API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] Kijiji returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
result = items || [];
|
||||
} else if (name === "search_facebook") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
@@ -132,15 +150,37 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchFacebookItems(
|
||||
query,
|
||||
1,
|
||||
args.location || "toronto",
|
||||
args.maxItems || 25,
|
||||
args.cookiesSource,
|
||||
undefined,
|
||||
const params = new URLSearchParams({ q: query });
|
||||
if (args.location) params.append("location", args.location);
|
||||
if (args.maxItems)
|
||||
params.append("maxItems", args.maxItems.toString());
|
||||
if (args.cookiesSource) params.append("cookies", args.cookiesSource);
|
||||
|
||||
console.log(
|
||||
`[MCP] Calling Facebook API: ${API_BASE_URL}/facebook?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/facebook?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] Facebook API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] Facebook returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
result = items || [];
|
||||
} else if (name === "search_ebay") {
|
||||
const query = args.query;
|
||||
if (!query) {
|
||||
@@ -150,18 +190,49 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
||||
error: { code: -32602, message: "query parameter is required" },
|
||||
});
|
||||
}
|
||||
const items = await fetchEbayItems(query, 1, {
|
||||
minPrice: args.minPrice,
|
||||
maxPrice: args.maxPrice,
|
||||
strictMode: args.strictMode || false,
|
||||
exclusions: args.exclusions || [],
|
||||
keywords: args.keywords || [query],
|
||||
buyItNowOnly: args.buyItNowOnly !== false,
|
||||
canadaOnly: args.canadaOnly !== false,
|
||||
});
|
||||
const params = new URLSearchParams({ q: query });
|
||||
if (args.minPrice)
|
||||
params.append("minPrice", args.minPrice.toString());
|
||||
if (args.maxPrice)
|
||||
params.append("maxPrice", args.maxPrice.toString());
|
||||
if (args.strictMode !== undefined)
|
||||
params.append("strictMode", args.strictMode.toString());
|
||||
if (args.exclusions?.length)
|
||||
params.append("exclusions", args.exclusions.join(","));
|
||||
if (args.keywords?.length)
|
||||
params.append("keywords", args.keywords.join(","));
|
||||
if (args.buyItNowOnly !== undefined)
|
||||
params.append("buyItNowOnly", args.buyItNowOnly.toString());
|
||||
if (args.canadaOnly !== undefined)
|
||||
params.append("canadaOnly", args.canadaOnly.toString());
|
||||
if (args.maxItems)
|
||||
params.append("maxItems", args.maxItems.toString());
|
||||
|
||||
const results = args.maxItems ? items.slice(0, args.maxItems) : items;
|
||||
result = results || [];
|
||||
console.log(
|
||||
`[MCP] Calling eBay API: ${API_BASE_URL}/ebay?${params.toString()}`,
|
||||
);
|
||||
const response = await Promise.race([
|
||||
fetch(`${API_BASE_URL}/ebay?${params.toString()}`),
|
||||
new Promise<Response>((_, reject) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
reject(new Error(`Request timed out after ${API_TIMEOUT}ms`)),
|
||||
API_TIMEOUT,
|
||||
),
|
||||
),
|
||||
]);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error(
|
||||
`[MCP] eBay API error ${response.status}: ${errorText}`,
|
||||
);
|
||||
throw new Error(`API returned ${response.status}: ${errorText}`);
|
||||
}
|
||||
result = await response.json();
|
||||
console.log(
|
||||
`[MCP] eBay returned ${Array.isArray(result) ? result.length : 0} items`,
|
||||
);
|
||||
} else {
|
||||
return Response.json({
|
||||
jsonrpc: "2.0",
|
||||
|
||||
26
scripts/biome-symlink.sh
Executable file
26
scripts/biome-symlink.sh
Executable file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Get the path to the system biome executable
|
||||
BIOME_PATH=$(which biome)
|
||||
|
||||
if [ -z "$BIOME_PATH" ]; then
|
||||
echo "Error: biome executable not found in PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Find all biome executables in node_modules
|
||||
files=$(fd biome node_modules --type executable --no-ignore --follow)
|
||||
|
||||
if [ -z "$files" ]; then
|
||||
echo "No biome executables found in node_modules"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Replace each with a symlink to the system biome
|
||||
for file in $files; do
|
||||
echo "Replacing $file with symlink to $BIOME_PATH"
|
||||
rm "$file"
|
||||
ln -s "$BIOME_PATH" "$file"
|
||||
done
|
||||
|
||||
echo "Done."
|
||||
30
scripts/remove-eslint.sh
Executable file
30
scripts/remove-eslint.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
PATTERN="eslint"
|
||||
FILES="$(fd .)" # Or use 'find .' to search recursively
|
||||
|
||||
for file in $FILES; do
|
||||
if [[ -f "$file" ]]; then
|
||||
# 1. Use rg with line numbers (-n) and only the matched line (-o)
|
||||
# 2. Use awk to print ONLY the line number (field 1)
|
||||
# 3. Use xargs to pass multiple line numbers to a single sed command
|
||||
|
||||
LINE_NUMBERS=$(rg --line-number --no-filename "$PATTERN" "$file" | awk -F':' '{print $1}' | tr '\n' ',')
|
||||
|
||||
# Remove trailing comma if any
|
||||
LINE_NUMBERS=${LINE_NUMBERS%,}
|
||||
|
||||
if [[ -n "$LINE_NUMBERS" ]]; then
|
||||
echo "Deleting lines $LINE_NUMBERS from $file..."
|
||||
|
||||
# Use sed to delete the specified comma-separated line numbers in-place (-i)
|
||||
# NOTE: The syntax for -i might vary slightly between GNU sed (Linux) and BSD sed (macOS).
|
||||
sed -i.bak "${LINE_NUMBERS}d" "$file"
|
||||
|
||||
# Optional: Remove the backup file created by sed -i.bak
|
||||
# rm "${file}.bak"
|
||||
else
|
||||
echo "$file: No lines matching pattern found."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
25
scripts/start.sh
Executable file
25
scripts/start.sh
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
# Trap SIGTERM and SIGINT for graceful shutdown
|
||||
trap 'echo "Received shutdown signal, stopping services..."; kill -TERM $API_PID $MCP_PID 2>/dev/null; wait' TERM INT
|
||||
|
||||
# Start API Server in background
|
||||
echo "Starting API Server on port ${API_PORT:-4005}..."
|
||||
bun dist/api/index.js &
|
||||
API_PID=$!
|
||||
|
||||
# Give API server a moment to initialize
|
||||
sleep 1
|
||||
|
||||
# Start MCP Server in background
|
||||
echo "Starting MCP Server on port ${API_PORT:-4006}..."
|
||||
bun dist/mcp/index.js &
|
||||
MCP_PID=$!
|
||||
|
||||
echo "Both services started successfully"
|
||||
echo "API Server PID: $API_PID"
|
||||
echo "MCP Server PID: $MCP_PID"
|
||||
|
||||
# Wait for both processes
|
||||
wait $API_PID $MCP_PID
|
||||
Reference in New Issue
Block a user