export type ReviewSamplingBucket = "latest" | "hot" | "negative"; export interface ReviewSamplingComment { id: string; content: string; score: string | null; createdAt: string | null; authorLabel: string | null; } export interface SampledReviewComment { bucket: ReviewSamplingBucket; comment: T; } export interface ReviewSamplingResult { targetCount: number; actualCount: number; sampleInsufficient: boolean; bucketCounts: Record; comments: Array>; } const BUCKET_WEIGHTS: Record = { latest: 0.4, hot: 0.3, negative: 0.3 }; const QUOTA_PRIORITY: ReviewSamplingBucket[] = ["latest", "hot", "negative"]; const SELECTION_PRIORITY: ReviewSamplingBucket[] = ["negative", "hot", "latest"]; const FALLBACK_PRIORITY: ReviewSamplingBucket[] = ["latest", "hot", "negative"]; function normalizeBudget(value: number): number { if (!Number.isFinite(value) || value <= 0) { return 0; } return Math.floor(value); } function parseScore(score: string | null): number | null { if (!score) { return null; } const parsed = Number.parseInt(score, 10); return Number.isNaN(parsed) ? null : parsed; } function isNegativeComment( comment: ReviewSamplingComment, negativeKeywords: string[] ): boolean { const score = parseScore(comment.score); if (score !== null && score <= 3) { return true; } const normalizedContent = comment.content.toLowerCase(); return negativeKeywords.some((keyword) => normalizedContent.includes(keyword.toLowerCase())); } function getHotScore(comment: ReviewSamplingComment): number { const score = parseScore(comment.score); const scoreBonus = score !== null && score >= 4 ? 16 : score === 3 ? 8 : 0; const authorBonus = comment.authorLabel ? 12 : 0; const timestampBonus = comment.createdAt ? 4 : 0; return Math.min(comment.content.length, 160) + scoreBonus + authorBonus + timestampBonus; } function dedupeComments(comments: T[]): T[] { const deduped = new Map(); for (const comment of comments) { if (!deduped.has(comment.id)) { deduped.set(comment.id, comment); } } return Array.from(deduped.values()); } function buildBucketTargets( targetCount: number, availability: Record ): Record { const normalizedTarget = normalizeBudget(targetCount); const activeBuckets = QUOTA_PRIORITY.filter((bucket) => availability[bucket]); const targets: Record = { latest: 0, hot: 0, negative: 0 }; if (normalizedTarget === 0 || activeBuckets.length === 0) { return targets; } const activeWeight = activeBuckets.reduce((sum, bucket) => sum + BUCKET_WEIGHTS[bucket], 0); let allocated = 0; const fractions = activeBuckets.map((bucket) => { const raw = (normalizedTarget * BUCKET_WEIGHTS[bucket]) / activeWeight; const base = Math.floor(raw); targets[bucket] = base; allocated += base; return { bucket, fraction: raw - base }; }); let remainder = normalizedTarget - allocated; fractions .sort( (left, right) => right.fraction - left.fraction || QUOTA_PRIORITY.indexOf(left.bucket) - QUOTA_PRIORITY.indexOf(right.bucket) ) .forEach(({ bucket }) => { if (remainder <= 0) { return; } targets[bucket] += 1; remainder -= 1; }); return targets; } function hasUnusedComment(pool: T[], usedIds: Set): boolean { return pool.some((comment) => !usedIds.has(comment.id)); } function pickNextComment( bucket: ReviewSamplingBucket, pool: T[], usedIds: Set, selected: Array>, bucketCounts: Record ): boolean { for (const comment of pool) { if (usedIds.has(comment.id)) { continue; } usedIds.add(comment.id); selected.push({ bucket, comment }); bucketCounts[bucket] += 1; return true; } return false; } export function buildReviewBudgetPlan( candidateIds: string[], perLinkLimit: number, taskTotalLimit: number ): Map { const uniqueCandidateIds = Array.from(new Set(candidateIds)); const budgets = new Map( uniqueCandidateIds.map((candidateId) => [candidateId, 0] as const) ); const normalizedPerLinkLimit = normalizeBudget(perLinkLimit); const normalizedTaskTotalLimit = normalizeBudget(taskTotalLimit); if ( uniqueCandidateIds.length === 0 || normalizedPerLinkLimit === 0 || normalizedTaskTotalLimit === 0 ) { return budgets; } const totalBudget = Math.min( normalizedTaskTotalLimit, uniqueCandidateIds.length * normalizedPerLinkLimit ); const baseAllocation = Math.min( normalizedPerLinkLimit, Math.floor(totalBudget / uniqueCandidateIds.length) ); for (const candidateId of uniqueCandidateIds) { budgets.set(candidateId, baseAllocation); } let remainder = totalBudget - baseAllocation * uniqueCandidateIds.length; while (remainder > 0) { let assigned = false; for (const candidateId of uniqueCandidateIds) { const currentBudget = budgets.get(candidateId) ?? 0; if (currentBudget >= normalizedPerLinkLimit) { continue; } budgets.set(candidateId, currentBudget + 1); remainder -= 1; assigned = true; if (remainder === 0) { break; } } if (!assigned) { break; } } return budgets; } export function sampleReviewComments( comments: T[], targetCount: number, negativeKeywords: string[] ): ReviewSamplingResult { const uniqueComments = dedupeComments(comments); const normalizedTarget = normalizeBudget(targetCount); const bucketCounts: Record = { latest: 0, hot: 0, negative: 0 }; if (normalizedTarget === 0 || uniqueComments.length === 0) { return { targetCount: normalizedTarget, actualCount: 0, sampleInsufficient: normalizedTarget > 0, bucketCounts, comments: [] }; } const negativeCommentIds = new Set( uniqueComments .filter((comment) => isNegativeComment(comment, negativeKeywords)) .map((comment) => comment.id) ); const negativePool = uniqueComments.filter((comment) => negativeCommentIds.has(comment.id)); const latestPool = uniqueComments.filter((comment) => !negativeCommentIds.has(comment.id)); const hotPool = [...latestPool].sort( (left, right) => getHotScore(right) - getHotScore(left) || latestPool.findIndex((comment) => comment.id === left.id) - latestPool.findIndex((comment) => comment.id === right.id) ); const bucketPools: Record = { latest: latestPool, hot: hotPool, negative: negativePool }; const bucketTargets = buildBucketTargets(normalizedTarget, { latest: latestPool.length > 0, hot: hotPool.length > 0, negative: negativePool.length > 0 }); const selected: Array> = []; const usedIds = new Set(); for (const bucket of SELECTION_PRIORITY) { while ( bucketCounts[bucket] < bucketTargets[bucket] && pickNextComment(bucket, bucketPools[bucket], usedIds, selected, bucketCounts) ) { // Continue until this bucket reaches its target or runs out of comments. } } while (selected.length < normalizedTarget) { const deficitBucket = SELECTION_PRIORITY.find( (bucket) => bucketCounts[bucket] < bucketTargets[bucket] && hasUnusedComment(bucketPools[bucket], usedIds) ); if (deficitBucket) { if (pickNextComment(deficitBucket, bucketPools[deficitBucket], usedIds, selected, bucketCounts)) { continue; } } const fallbackBucket = FALLBACK_PRIORITY.find((bucket) => hasUnusedComment(bucketPools[bucket], usedIds) ); if (!fallbackBucket) { break; } if (!pickNextComment(fallbackBucket, bucketPools[fallbackBucket], usedIds, selected, bucketCounts)) { break; } } return { targetCount: normalizedTarget, actualCount: selected.length, sampleInsufficient: selected.length < normalizedTarget, bucketCounts, comments: selected }; }