313 lines
8.4 KiB
TypeScript

export type ReviewSamplingBucket = "latest" | "hot" | "negative";
export interface ReviewSamplingComment {
id: string;
content: string;
score: string | null;
createdAt: string | null;
authorLabel: string | null;
}
export interface SampledReviewComment<T extends ReviewSamplingComment = ReviewSamplingComment> {
bucket: ReviewSamplingBucket;
comment: T;
}
export interface ReviewSamplingResult<T extends ReviewSamplingComment = ReviewSamplingComment> {
targetCount: number;
actualCount: number;
sampleInsufficient: boolean;
bucketCounts: Record<ReviewSamplingBucket, number>;
comments: Array<SampledReviewComment<T>>;
}
const BUCKET_WEIGHTS: Record<ReviewSamplingBucket, number> = {
latest: 0.4,
hot: 0.3,
negative: 0.3
};
const QUOTA_PRIORITY: ReviewSamplingBucket[] = ["latest", "hot", "negative"];
const SELECTION_PRIORITY: ReviewSamplingBucket[] = ["negative", "hot", "latest"];
const FALLBACK_PRIORITY: ReviewSamplingBucket[] = ["latest", "hot", "negative"];
function normalizeBudget(value: number): number {
if (!Number.isFinite(value) || value <= 0) {
return 0;
}
return Math.floor(value);
}
function parseScore(score: string | null): number | null {
if (!score) {
return null;
}
const parsed = Number.parseInt(score, 10);
return Number.isNaN(parsed) ? null : parsed;
}
function isNegativeComment(
comment: ReviewSamplingComment,
negativeKeywords: string[]
): boolean {
const score = parseScore(comment.score);
if (score !== null && score <= 3) {
return true;
}
const normalizedContent = comment.content.toLowerCase();
return negativeKeywords.some((keyword) => normalizedContent.includes(keyword.toLowerCase()));
}
function getHotScore(comment: ReviewSamplingComment): number {
const score = parseScore(comment.score);
const scoreBonus = score !== null && score >= 4 ? 16 : score === 3 ? 8 : 0;
const authorBonus = comment.authorLabel ? 12 : 0;
const timestampBonus = comment.createdAt ? 4 : 0;
return Math.min(comment.content.length, 160) + scoreBonus + authorBonus + timestampBonus;
}
function dedupeComments<T extends ReviewSamplingComment>(comments: T[]): T[] {
const deduped = new Map<string, T>();
for (const comment of comments) {
if (!deduped.has(comment.id)) {
deduped.set(comment.id, comment);
}
}
return Array.from(deduped.values());
}
function buildBucketTargets(
targetCount: number,
availability: Record<ReviewSamplingBucket, boolean>
): Record<ReviewSamplingBucket, number> {
const normalizedTarget = normalizeBudget(targetCount);
const activeBuckets = QUOTA_PRIORITY.filter((bucket) => availability[bucket]);
const targets: Record<ReviewSamplingBucket, number> = {
latest: 0,
hot: 0,
negative: 0
};
if (normalizedTarget === 0 || activeBuckets.length === 0) {
return targets;
}
const activeWeight = activeBuckets.reduce((sum, bucket) => sum + BUCKET_WEIGHTS[bucket], 0);
let allocated = 0;
const fractions = activeBuckets.map((bucket) => {
const raw = (normalizedTarget * BUCKET_WEIGHTS[bucket]) / activeWeight;
const base = Math.floor(raw);
targets[bucket] = base;
allocated += base;
return {
bucket,
fraction: raw - base
};
});
let remainder = normalizedTarget - allocated;
fractions
.sort(
(left, right) =>
right.fraction - left.fraction ||
QUOTA_PRIORITY.indexOf(left.bucket) - QUOTA_PRIORITY.indexOf(right.bucket)
)
.forEach(({ bucket }) => {
if (remainder <= 0) {
return;
}
targets[bucket] += 1;
remainder -= 1;
});
return targets;
}
function hasUnusedComment<T extends ReviewSamplingComment>(pool: T[], usedIds: Set<string>): boolean {
return pool.some((comment) => !usedIds.has(comment.id));
}
function pickNextComment<T extends ReviewSamplingComment>(
bucket: ReviewSamplingBucket,
pool: T[],
usedIds: Set<string>,
selected: Array<SampledReviewComment<T>>,
bucketCounts: Record<ReviewSamplingBucket, number>
): boolean {
for (const comment of pool) {
if (usedIds.has(comment.id)) {
continue;
}
usedIds.add(comment.id);
selected.push({
bucket,
comment
});
bucketCounts[bucket] += 1;
return true;
}
return false;
}
export function buildReviewBudgetPlan(
candidateIds: string[],
perLinkLimit: number,
taskTotalLimit: number
): Map<string, number> {
const uniqueCandidateIds = Array.from(new Set(candidateIds));
const budgets = new Map<string, number>(
uniqueCandidateIds.map((candidateId) => [candidateId, 0] as const)
);
const normalizedPerLinkLimit = normalizeBudget(perLinkLimit);
const normalizedTaskTotalLimit = normalizeBudget(taskTotalLimit);
if (
uniqueCandidateIds.length === 0 ||
normalizedPerLinkLimit === 0 ||
normalizedTaskTotalLimit === 0
) {
return budgets;
}
const totalBudget = Math.min(
normalizedTaskTotalLimit,
uniqueCandidateIds.length * normalizedPerLinkLimit
);
const baseAllocation = Math.min(
normalizedPerLinkLimit,
Math.floor(totalBudget / uniqueCandidateIds.length)
);
for (const candidateId of uniqueCandidateIds) {
budgets.set(candidateId, baseAllocation);
}
let remainder = totalBudget - baseAllocation * uniqueCandidateIds.length;
while (remainder > 0) {
let assigned = false;
for (const candidateId of uniqueCandidateIds) {
const currentBudget = budgets.get(candidateId) ?? 0;
if (currentBudget >= normalizedPerLinkLimit) {
continue;
}
budgets.set(candidateId, currentBudget + 1);
remainder -= 1;
assigned = true;
if (remainder === 0) {
break;
}
}
if (!assigned) {
break;
}
}
return budgets;
}
export function sampleReviewComments<T extends ReviewSamplingComment>(
comments: T[],
targetCount: number,
negativeKeywords: string[]
): ReviewSamplingResult<T> {
const uniqueComments = dedupeComments(comments);
const normalizedTarget = normalizeBudget(targetCount);
const bucketCounts: Record<ReviewSamplingBucket, number> = {
latest: 0,
hot: 0,
negative: 0
};
if (normalizedTarget === 0 || uniqueComments.length === 0) {
return {
targetCount: normalizedTarget,
actualCount: 0,
sampleInsufficient: normalizedTarget > 0,
bucketCounts,
comments: []
};
}
const negativeCommentIds = new Set(
uniqueComments
.filter((comment) => isNegativeComment(comment, negativeKeywords))
.map((comment) => comment.id)
);
const negativePool = uniqueComments.filter((comment) => negativeCommentIds.has(comment.id));
const latestPool = uniqueComments.filter((comment) => !negativeCommentIds.has(comment.id));
const hotPool = [...latestPool].sort(
(left, right) =>
getHotScore(right) - getHotScore(left) ||
latestPool.findIndex((comment) => comment.id === left.id) -
latestPool.findIndex((comment) => comment.id === right.id)
);
const bucketPools: Record<ReviewSamplingBucket, T[]> = {
latest: latestPool,
hot: hotPool,
negative: negativePool
};
const bucketTargets = buildBucketTargets(normalizedTarget, {
latest: latestPool.length > 0,
hot: hotPool.length > 0,
negative: negativePool.length > 0
});
const selected: Array<SampledReviewComment<T>> = [];
const usedIds = new Set<string>();
for (const bucket of SELECTION_PRIORITY) {
while (
bucketCounts[bucket] < bucketTargets[bucket] &&
pickNextComment(bucket, bucketPools[bucket], usedIds, selected, bucketCounts)
) {
// Continue until this bucket reaches its target or runs out of comments.
}
}
while (selected.length < normalizedTarget) {
const deficitBucket = SELECTION_PRIORITY.find(
(bucket) =>
bucketCounts[bucket] < bucketTargets[bucket] &&
hasUnusedComment(bucketPools[bucket], usedIds)
);
if (deficitBucket) {
if (pickNextComment(deficitBucket, bucketPools[deficitBucket], usedIds, selected, bucketCounts)) {
continue;
}
}
const fallbackBucket = FALLBACK_PRIORITY.find((bucket) =>
hasUnusedComment(bucketPools[bucket], usedIds)
);
if (!fallbackBucket) {
break;
}
if (!pickNextComment(fallbackBucket, bucketPools[fallbackBucket], usedIds, selected, bucketCounts)) {
break;
}
}
return {
targetCount: normalizedTarget,
actualCount: selected.length,
sampleInsufficient: selected.length < normalizedTarget,
bucketCounts,
comments: selected
};
}