feat: enhance ChangeExtractorService with interval-based scoping

- Implemented deterministic interval scoping in ChangeExtractorService to improve task change retrieval when no specific scope is found.
- Added a new method, extractIntervalScopedChanges, to handle changes based on provided intervals, enhancing the accuracy of change tracking.
- Updated the logic for deriving intervals from task status history, ensuring better handling of task transitions.
- Refactored related code for improved clarity and maintainability, including better type definitions and error handling.
This commit is contained in:
iliya 2026-03-04 15:44:30 +02:00
parent 31c4c7a441
commit bd781aed2f
2 changed files with 227 additions and 44 deletions

View file

@ -139,8 +139,46 @@ export class ChangeExtractorService {
}
}
// Если scope не найден — fallback на весь файл
// Если scope не найден — try deterministic interval scoping, else fallback to whole file
if (allScopes.length === 0) {
const intervals = options?.intervals ?? taskMeta?.intervals;
if (Array.isArray(intervals) && intervals.length > 0) {
const { files, toolUseIds, startTimestamp, endTimestamp } =
await this.extractIntervalScopedChanges(logRefs, intervals, projectPath);
const intervalScope: TaskChangeScope = {
taskId,
memberName: taskMeta?.owner ?? logRefs[0]?.memberName ?? '',
startLine: 0,
endLine: 0,
startTimestamp,
endTimestamp,
toolUseIds,
filePaths: files.map((f) => f.filePath),
confidence: {
tier: 2,
label: 'medium',
reason: 'Scoped by persisted task workIntervals (timestamp-based)',
},
};
return {
teamName,
taskId,
files,
totalLinesAdded: files.reduce((sum, f) => sum + f.linesAdded, 0),
totalLinesRemoved: files.reduce((sum, f) => sum + f.linesRemoved, 0),
totalFiles: files.length,
confidence: 'medium',
computedAt: new Date().toISOString(),
scope: intervalScope,
warnings:
files.length === 0
? ['No file edits found within persisted workIntervals.']
: ['Task boundaries missing — scoped by workIntervals timestamps.'],
};
}
return this.fallbackSingleTaskScope(teamName, taskId, logRefs, projectPath);
}
@ -203,10 +241,46 @@ export class ChangeExtractorService {
typeof (i as Record<string, unknown>).completedAt === 'string')
)
: undefined;
const derivedIntervals = (() => {
if (Array.isArray(intervals) && intervals.length > 0) return intervals;
const rawHistory = parsed.statusHistory;
if (!Array.isArray(rawHistory)) return undefined;
const transitions = rawHistory
.map((h) => (h && typeof h === 'object' ? (h as Record<string, unknown>) : null))
.filter((h): h is Record<string, unknown> => h !== null)
.map((h) => ({
to: typeof h.to === 'string' ? h.to : null,
timestamp: typeof h.timestamp === 'string' ? h.timestamp : null,
}))
.filter(
(t): t is { to: string; timestamp: string } => t.to !== null && t.timestamp !== null
)
.sort((a, b) => Date.parse(a.timestamp) - Date.parse(b.timestamp));
if (transitions.length === 0) return undefined;
const derived: { startedAt: string; completedAt?: string }[] = [];
let currentStart: string | null = null;
for (const t of transitions) {
if (t.to === 'in_progress') {
if (!currentStart) currentStart = t.timestamp;
continue;
}
if (currentStart) {
derived.push({ startedAt: currentStart, completedAt: t.timestamp });
currentStart = null;
}
}
if (currentStart) derived.push({ startedAt: currentStart });
return derived.length > 0 ? derived : undefined;
})();
return {
owner: typeof parsed.owner === 'string' ? parsed.owner : undefined,
status: typeof parsed.status === 'string' ? parsed.status : undefined,
intervals,
intervals: derivedIntervals,
};
} catch {
return null;
@ -223,6 +297,74 @@ export class ChangeExtractorService {
}
}
private async extractIntervalScopedChanges(
logRefs: LogFileRef[],
intervals: { startedAt: string; completedAt?: string }[],
projectPath?: string
): Promise<{
files: FileChangeSummary[];
toolUseIds: string[];
startTimestamp: string;
endTimestamp: string;
}> {
const normalized: {
startMs: number;
endMs: number | null;
startedAt: string;
completedAt?: string;
}[] = [];
for (const i of intervals) {
const startMs = Date.parse(i.startedAt);
if (!Number.isFinite(startMs)) continue;
const endMsRaw = typeof i.completedAt === 'string' ? Date.parse(i.completedAt) : Number.NaN;
const endMs = Number.isFinite(endMsRaw) ? endMsRaw : null;
normalized.push({ startMs, endMs, startedAt: i.startedAt, completedAt: i.completedAt });
}
normalized.sort((a, b) => a.startMs - b.startMs);
const startTimestamp = normalized[0]?.startedAt ?? '';
const maxEnd = normalized.reduce<{ endMs: number; endTimestamp: string } | null>((acc, it) => {
if (it.endMs == null || typeof it.completedAt !== 'string') return acc;
if (!acc || it.endMs > acc.endMs) return { endMs: it.endMs, endTimestamp: it.completedAt };
return acc;
}, null);
const endTimestamp = maxEnd?.endTimestamp ?? '';
const inAnyInterval = (ts: string): boolean => {
const ms = Date.parse(ts);
if (!Number.isFinite(ms)) return false;
for (const it of normalized) {
if (ms < it.startMs) continue;
if (it.endMs == null) return true;
if (ms <= it.endMs) return true;
}
return false;
};
const allowedSnippets: SnippetDiff[] = [];
const toolUseIdsSet = new Set<string>();
for (const ref of logRefs) {
const snippets = await this.parseJSONLFile(ref.filePath);
for (const s of snippets) {
if (s.isError) continue;
if (!inAnyInterval(s.timestamp)) continue;
allowedSnippets.push(s);
if (s.toolUseId) toolUseIdsSet.add(s.toolUseId);
}
}
const files = this.aggregateByFile(allowedSnippets, projectPath);
return {
files,
toolUseIds: [...toolUseIdsSet],
startTimestamp,
endTimestamp,
};
}
/**
* Compute a context hash from old/newString for reliable hunksnippet matching.
* Uses first+last 3 lines of both strings as a fingerprint.

View file

@ -472,52 +472,86 @@ export class TeamMemberLogsFinder {
teamName: string,
taskId: string
): Promise<boolean> {
const escaped = taskId.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const numericTaskId = /^\d+$/.test(taskId) ? taskId : null;
const teamEscaped = escapeRegex(teamName);
const teamPatterns: RegExp[] = [
// Team tool inputs often include team_name
new RegExp(`"team_name"\\s*:\\s*"${teamEscaped}"`, 'i'),
// Some variants may use teamName or team
new RegExp(`"teamName"\\s*:\\s*"${teamEscaped}"`, 'i'),
new RegExp(`"team"\\s*:\\s*"${teamEscaped}"`, 'i'),
// CLI usage: node ".../teamctl.js" --team team-alpha task start 9
new RegExp(`\\b--team\\b\\s*(?:=\\s*)?(?:"${teamEscaped}"|${teamEscaped})\\b`, 'i'),
];
const patterns: RegExp[] = [
new RegExp(`"task_id"\\s*:\\s*"${escaped}"`, 'i'),
new RegExp(`"taskId"\\s*:\\s*"${escaped}"`, 'i'),
];
if (numericTaskId) {
patterns.push(
new RegExp(`"task_id"\\s*:\\s*${numericTaskId}\\b`),
new RegExp(`"taskId"\\s*:\\s*${numericTaskId}\\b`),
// Support teamctl command lines (may appear in tool output).
// Example: node ".../teamctl.js" --team "t" task start 10
new RegExp(`\\bteamctl(?:\\.js)?\\b.{0,350}\\b${numericTaskId}\\b`, 'i')
);
}
const teamLower = teamName.trim().toLowerCase();
const taskIdStr = taskId.trim();
const extractTaskIdFromUnknown = (raw: unknown): string | null => {
if (typeof raw === 'string') return raw.trim();
if (typeof raw === 'number' && Number.isFinite(raw)) return String(raw);
return null;
};
const extractTeamFromInput = (input: Record<string, unknown>): string | null => {
const raw =
typeof input.team_name === 'string'
? input.team_name
: typeof input.teamName === 'string'
? input.teamName
: typeof input.team === 'string'
? input.team
: null;
return typeof raw === 'string' ? raw.trim() : null;
};
const matchesTeamctlCommand = (command: string): boolean => {
if (!/\bteamctl(?:\.js)?\b/i.test(command)) return false;
const teamMatch = /\s--team(?:\s+|=)(?:"([^"]+)"|'([^']+)'|([^\s]+))/i.exec(command);
const cmdTeam = (teamMatch?.[1] ?? teamMatch?.[2] ?? teamMatch?.[3])?.trim();
if (cmdTeam?.toLowerCase() !== teamLower) return false;
const taskMatch = /\btask\s+(?:start|complete|set-status)\s+(\d+)\b/i.exec(command);
const cmdTaskId = taskMatch?.[1];
return Boolean(cmdTaskId && cmdTaskId === taskIdStr);
};
try {
const stream = createReadStream(filePath, { encoding: 'utf8' });
const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
let foundTask = false;
let foundTeam = false;
for await (const line of rl) {
// We require BOTH taskId and teamName to avoid cross-team collisions when multiple
// teams share the same projectPath (task IDs are only unique per team).
//
// But they often appear on different lines (e.g. team_name is in Task tool input, while
// taskId appears in a tool result or CLI output). So we track them independently.
if (!foundTask && patterns.some((re) => re.test(line))) {
foundTask = true;
}
if (!foundTeam && teamPatterns.some((re) => re.test(line))) {
foundTeam = true;
}
if (foundTask && foundTeam) {
rl.close();
stream.destroy();
return true;
const trimmed = line.trim();
if (!trimmed) continue;
try {
const entry = JSON.parse(trimmed) as Record<string, unknown>;
const content = this.extractEntryContent(entry);
if (!Array.isArray(content)) continue;
for (const block of content) {
if (!block || typeof block !== 'object') continue;
const b = block as Record<string, unknown>;
if (b.type !== 'tool_use') continue;
const rawName = typeof b.name === 'string' ? b.name : '';
const toolName = rawName.replace(/^proxy_/, '');
const input = b.input as Record<string, unknown> | undefined;
if (!input) continue;
// Deterministic structured match: any tool whose input references this task+team.
const inputTeam = extractTeamFromInput(input);
const rawTaskId = input.taskId ?? input.task_id;
const inputTaskId = extractTaskIdFromUnknown(rawTaskId);
if (
inputTeam?.toLowerCase() === teamLower &&
inputTaskId &&
inputTaskId === taskIdStr
) {
rl.close();
stream.destroy();
return true;
}
// Deterministic CLI match: teamctl command line (Bash tool).
if (toolName === 'Bash') {
const command = typeof input.command === 'string' ? input.command : '';
if (command && matchesTeamctlCommand(command)) {
rl.close();
stream.destroy();
return true;
}
}
}
} catch {
// ignore parse errors
}
}
rl.close();
@ -528,6 +562,13 @@ export class TeamMemberLogsFinder {
return false;
}
private extractEntryContent(entry: Record<string, unknown>): unknown[] | null {
const message = entry.message as Record<string, unknown> | undefined;
if (message && Array.isArray(message.content)) return message.content as unknown[];
if (Array.isArray(entry.content)) return entry.content as unknown[];
return null;
}
private async listSessionDirs(projectDir: string): Promise<string[]> {
try {
const dirEntries = await fs.readdir(projectDir, { withFileTypes: true });