This pass ensures that all tokens do not spread multiple lines. Additionally, it marks tokens that are at the start or end of lines. This pass is only required for Markdown output.
import type { TextSpan, TokenInfo } from "../types";
import type { TokenInfoPass } from "./TokenInfoPass";
export class SplitByLinePass implements TokenInfoPass {
Process tokens and split any that span multiple lines, while simultaneously marking tokens that are at the start or end of their lines
process(tokens: TokenInfo[]): TokenInfo[] {
const result: TokenInfo[] = [];
// Group all resulting tokens by line as we process them
const tokensByLine = new Map<number, TokenInfo[]>();
for (const token of tokens) {
if (token.span.start.line === token.span.end.line) {
// Single line token - add to its line directly
const lineNumber = token.span.start.line;
if (!tokensByLine.has(lineNumber)) {
tokensByLine.set(lineNumber, []);
}
tokensByLine.get(lineNumber)?.push(token);
} else {
// Multi-line token - split it and add each part to its respective line
const splitTokens = this.splitTokenByLines(token);
for (const splitToken of splitTokens) {
const lineNumber = splitToken.span.start.line;
if (!tokensByLine.has(lineNumber)) {
tokensByLine.set(lineNumber, []);
}
tokensByLine.get(lineNumber)?.push(splitToken);
}
}
}
// Now process each line's tokens and add line position markers
for (const [, lineTokens] of tokensByLine) {
// Sort tokens by start column to determine position
const sortedTokens = [...lineTokens].sort(
(a, b) => a.span.start.column - b.span.start.column,
);
const firstToken = sortedTokens[0];
const lastToken = sortedTokens[sortedTokens.length - 1];
if (lastToken.meta.some((m) => m.type === "endOfFile")) {
result.push(...sortedTokens);
continue;
}
if (!firstToken.meta.some((m) => m.type === "comment")) {
firstToken.meta.push({ type: "startOfLine" });
if (lastToken.meta.some((m) => m.type === "comment")) {
firstToken.meta.push({ type: "endOfLine" });
} else {
lastToken.meta.push({ type: "endOfLine" });
}
}
// Add all processed tokens to the result array
result.push(...sortedTokens);
}
return result;
}
Split a multi-line token into multiple single-line tokens
private splitTokenByLines(token: TokenInfo): TokenInfo[] {
if (token.meta.some((m) => m.type === "comment")) {
return [token];
}
const result: TokenInfo[] = [];
const { span, meta } = token;
let currentLine = span.start.line;
let currentColumn = span.start.column;
// Iterate through each line the token spans
while (currentLine <= span.end.line) {
const isFirstLine = currentLine === span.start.line;
const isLastLine = currentLine === span.end.line;
// Calculate the span for the current line portion
const tokenSpan: TextSpan = {
start: {
line: currentLine,
column: isFirstLine ? currentColumn : 0,
},
end: {
line: currentLine,
column: isLastLine
? span.end.column
: Number.MAX_SAFE_INTEGER,
},
};
// Create new token with the same metadata but adjusted span
const splitToken: TokenInfo = {
span: tokenSpan,
meta: [...meta], // Copy metadata array
};
result.push(splitToken);
// Move to next line
currentLine++;
currentColumn = 0;
}
return result;
}
}