313 lines
8.4 KiB
TypeScript
313 lines
8.4 KiB
TypeScript
export type ReviewSamplingBucket = "latest" | "hot" | "negative";
|
|
|
|
export interface ReviewSamplingComment {
|
|
id: string;
|
|
content: string;
|
|
score: string | null;
|
|
createdAt: string | null;
|
|
authorLabel: string | null;
|
|
}
|
|
|
|
export interface SampledReviewComment<T extends ReviewSamplingComment = ReviewSamplingComment> {
|
|
bucket: ReviewSamplingBucket;
|
|
comment: T;
|
|
}
|
|
|
|
export interface ReviewSamplingResult<T extends ReviewSamplingComment = ReviewSamplingComment> {
|
|
targetCount: number;
|
|
actualCount: number;
|
|
sampleInsufficient: boolean;
|
|
bucketCounts: Record<ReviewSamplingBucket, number>;
|
|
comments: Array<SampledReviewComment<T>>;
|
|
}
|
|
|
|
const BUCKET_WEIGHTS: Record<ReviewSamplingBucket, number> = {
|
|
latest: 0.4,
|
|
hot: 0.3,
|
|
negative: 0.3
|
|
};
|
|
|
|
const QUOTA_PRIORITY: ReviewSamplingBucket[] = ["latest", "hot", "negative"];
|
|
const SELECTION_PRIORITY: ReviewSamplingBucket[] = ["negative", "hot", "latest"];
|
|
const FALLBACK_PRIORITY: ReviewSamplingBucket[] = ["latest", "hot", "negative"];
|
|
|
|
function normalizeBudget(value: number): number {
|
|
if (!Number.isFinite(value) || value <= 0) {
|
|
return 0;
|
|
}
|
|
|
|
return Math.floor(value);
|
|
}
|
|
|
|
function parseScore(score: string | null): number | null {
|
|
if (!score) {
|
|
return null;
|
|
}
|
|
|
|
const parsed = Number.parseInt(score, 10);
|
|
return Number.isNaN(parsed) ? null : parsed;
|
|
}
|
|
|
|
function isNegativeComment(
|
|
comment: ReviewSamplingComment,
|
|
negativeKeywords: string[]
|
|
): boolean {
|
|
const score = parseScore(comment.score);
|
|
if (score !== null && score <= 3) {
|
|
return true;
|
|
}
|
|
|
|
const normalizedContent = comment.content.toLowerCase();
|
|
return negativeKeywords.some((keyword) => normalizedContent.includes(keyword.toLowerCase()));
|
|
}
|
|
|
|
function getHotScore(comment: ReviewSamplingComment): number {
|
|
const score = parseScore(comment.score);
|
|
const scoreBonus = score !== null && score >= 4 ? 16 : score === 3 ? 8 : 0;
|
|
const authorBonus = comment.authorLabel ? 12 : 0;
|
|
const timestampBonus = comment.createdAt ? 4 : 0;
|
|
|
|
return Math.min(comment.content.length, 160) + scoreBonus + authorBonus + timestampBonus;
|
|
}
|
|
|
|
function dedupeComments<T extends ReviewSamplingComment>(comments: T[]): T[] {
|
|
const deduped = new Map<string, T>();
|
|
for (const comment of comments) {
|
|
if (!deduped.has(comment.id)) {
|
|
deduped.set(comment.id, comment);
|
|
}
|
|
}
|
|
|
|
return Array.from(deduped.values());
|
|
}
|
|
|
|
function buildBucketTargets(
|
|
targetCount: number,
|
|
availability: Record<ReviewSamplingBucket, boolean>
|
|
): Record<ReviewSamplingBucket, number> {
|
|
const normalizedTarget = normalizeBudget(targetCount);
|
|
const activeBuckets = QUOTA_PRIORITY.filter((bucket) => availability[bucket]);
|
|
const targets: Record<ReviewSamplingBucket, number> = {
|
|
latest: 0,
|
|
hot: 0,
|
|
negative: 0
|
|
};
|
|
|
|
if (normalizedTarget === 0 || activeBuckets.length === 0) {
|
|
return targets;
|
|
}
|
|
|
|
const activeWeight = activeBuckets.reduce((sum, bucket) => sum + BUCKET_WEIGHTS[bucket], 0);
|
|
let allocated = 0;
|
|
const fractions = activeBuckets.map((bucket) => {
|
|
const raw = (normalizedTarget * BUCKET_WEIGHTS[bucket]) / activeWeight;
|
|
const base = Math.floor(raw);
|
|
targets[bucket] = base;
|
|
allocated += base;
|
|
|
|
return {
|
|
bucket,
|
|
fraction: raw - base
|
|
};
|
|
});
|
|
|
|
let remainder = normalizedTarget - allocated;
|
|
fractions
|
|
.sort(
|
|
(left, right) =>
|
|
right.fraction - left.fraction ||
|
|
QUOTA_PRIORITY.indexOf(left.bucket) - QUOTA_PRIORITY.indexOf(right.bucket)
|
|
)
|
|
.forEach(({ bucket }) => {
|
|
if (remainder <= 0) {
|
|
return;
|
|
}
|
|
|
|
targets[bucket] += 1;
|
|
remainder -= 1;
|
|
});
|
|
|
|
return targets;
|
|
}
|
|
|
|
function hasUnusedComment<T extends ReviewSamplingComment>(pool: T[], usedIds: Set<string>): boolean {
|
|
return pool.some((comment) => !usedIds.has(comment.id));
|
|
}
|
|
|
|
function pickNextComment<T extends ReviewSamplingComment>(
|
|
bucket: ReviewSamplingBucket,
|
|
pool: T[],
|
|
usedIds: Set<string>,
|
|
selected: Array<SampledReviewComment<T>>,
|
|
bucketCounts: Record<ReviewSamplingBucket, number>
|
|
): boolean {
|
|
for (const comment of pool) {
|
|
if (usedIds.has(comment.id)) {
|
|
continue;
|
|
}
|
|
|
|
usedIds.add(comment.id);
|
|
selected.push({
|
|
bucket,
|
|
comment
|
|
});
|
|
bucketCounts[bucket] += 1;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
export function buildReviewBudgetPlan(
|
|
candidateIds: string[],
|
|
perLinkLimit: number,
|
|
taskTotalLimit: number
|
|
): Map<string, number> {
|
|
const uniqueCandidateIds = Array.from(new Set(candidateIds));
|
|
const budgets = new Map<string, number>(
|
|
uniqueCandidateIds.map((candidateId) => [candidateId, 0] as const)
|
|
);
|
|
const normalizedPerLinkLimit = normalizeBudget(perLinkLimit);
|
|
const normalizedTaskTotalLimit = normalizeBudget(taskTotalLimit);
|
|
|
|
if (
|
|
uniqueCandidateIds.length === 0 ||
|
|
normalizedPerLinkLimit === 0 ||
|
|
normalizedTaskTotalLimit === 0
|
|
) {
|
|
return budgets;
|
|
}
|
|
|
|
const totalBudget = Math.min(
|
|
normalizedTaskTotalLimit,
|
|
uniqueCandidateIds.length * normalizedPerLinkLimit
|
|
);
|
|
const baseAllocation = Math.min(
|
|
normalizedPerLinkLimit,
|
|
Math.floor(totalBudget / uniqueCandidateIds.length)
|
|
);
|
|
|
|
for (const candidateId of uniqueCandidateIds) {
|
|
budgets.set(candidateId, baseAllocation);
|
|
}
|
|
|
|
let remainder = totalBudget - baseAllocation * uniqueCandidateIds.length;
|
|
while (remainder > 0) {
|
|
let assigned = false;
|
|
|
|
for (const candidateId of uniqueCandidateIds) {
|
|
const currentBudget = budgets.get(candidateId) ?? 0;
|
|
if (currentBudget >= normalizedPerLinkLimit) {
|
|
continue;
|
|
}
|
|
|
|
budgets.set(candidateId, currentBudget + 1);
|
|
remainder -= 1;
|
|
assigned = true;
|
|
|
|
if (remainder === 0) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!assigned) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return budgets;
|
|
}
|
|
|
|
export function sampleReviewComments<T extends ReviewSamplingComment>(
|
|
comments: T[],
|
|
targetCount: number,
|
|
negativeKeywords: string[]
|
|
): ReviewSamplingResult<T> {
|
|
const uniqueComments = dedupeComments(comments);
|
|
const normalizedTarget = normalizeBudget(targetCount);
|
|
const bucketCounts: Record<ReviewSamplingBucket, number> = {
|
|
latest: 0,
|
|
hot: 0,
|
|
negative: 0
|
|
};
|
|
|
|
if (normalizedTarget === 0 || uniqueComments.length === 0) {
|
|
return {
|
|
targetCount: normalizedTarget,
|
|
actualCount: 0,
|
|
sampleInsufficient: normalizedTarget > 0,
|
|
bucketCounts,
|
|
comments: []
|
|
};
|
|
}
|
|
|
|
const negativeCommentIds = new Set(
|
|
uniqueComments
|
|
.filter((comment) => isNegativeComment(comment, negativeKeywords))
|
|
.map((comment) => comment.id)
|
|
);
|
|
const negativePool = uniqueComments.filter((comment) => negativeCommentIds.has(comment.id));
|
|
const latestPool = uniqueComments.filter((comment) => !negativeCommentIds.has(comment.id));
|
|
const hotPool = [...latestPool].sort(
|
|
(left, right) =>
|
|
getHotScore(right) - getHotScore(left) ||
|
|
latestPool.findIndex((comment) => comment.id === left.id) -
|
|
latestPool.findIndex((comment) => comment.id === right.id)
|
|
);
|
|
const bucketPools: Record<ReviewSamplingBucket, T[]> = {
|
|
latest: latestPool,
|
|
hot: hotPool,
|
|
negative: negativePool
|
|
};
|
|
const bucketTargets = buildBucketTargets(normalizedTarget, {
|
|
latest: latestPool.length > 0,
|
|
hot: hotPool.length > 0,
|
|
negative: negativePool.length > 0
|
|
});
|
|
const selected: Array<SampledReviewComment<T>> = [];
|
|
const usedIds = new Set<string>();
|
|
|
|
for (const bucket of SELECTION_PRIORITY) {
|
|
while (
|
|
bucketCounts[bucket] < bucketTargets[bucket] &&
|
|
pickNextComment(bucket, bucketPools[bucket], usedIds, selected, bucketCounts)
|
|
) {
|
|
// Continue until this bucket reaches its target or runs out of comments.
|
|
}
|
|
}
|
|
|
|
while (selected.length < normalizedTarget) {
|
|
const deficitBucket = SELECTION_PRIORITY.find(
|
|
(bucket) =>
|
|
bucketCounts[bucket] < bucketTargets[bucket] &&
|
|
hasUnusedComment(bucketPools[bucket], usedIds)
|
|
);
|
|
|
|
if (deficitBucket) {
|
|
if (pickNextComment(deficitBucket, bucketPools[deficitBucket], usedIds, selected, bucketCounts)) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
const fallbackBucket = FALLBACK_PRIORITY.find((bucket) =>
|
|
hasUnusedComment(bucketPools[bucket], usedIds)
|
|
);
|
|
|
|
if (!fallbackBucket) {
|
|
break;
|
|
}
|
|
|
|
if (!pickNextComment(fallbackBucket, bucketPools[fallbackBucket], usedIds, selected, bucketCounts)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return {
|
|
targetCount: normalizedTarget,
|
|
actualCount: selected.length,
|
|
sampleInsufficient: selected.length < normalizedTarget,
|
|
bucketCounts,
|
|
comments: selected
|
|
};
|
|
}
|