Skip to content

Commit

Permalink
Update MimeReader to use Span<T> for built-in intrinsics
Browse files Browse the repository at this point in the history
It's gotten to a point where .NET Core's built-in SIMD optimizations
in Span<T>.IndexOf() have gotten as good or better than my current
optimization for scanning until a '\n' in ScanContent().

It also allows me to now use Span<T>.IndexOf() in all other locations
that were using `while (*inptr != '\n') inptr++`, so we get improved
performance in those locations for free.

In some ways, it's sad to see the end of an era - but also exciting
because MimeReader is now faster than ever!
  • Loading branch information
jstedfast committed Jan 25, 2025
1 parent 5769bcd commit 5f25f1f
Show file tree
Hide file tree
Showing 2 changed files with 241 additions and 404 deletions.
112 changes: 23 additions & 89 deletions MimeKit/AsyncMimeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,7 @@ async Task<bool> StepByteOrderMarkAsync (CancellationToken cancellationToken)
return false;
}

unsafe {
fixed (byte* inbuf = input) {
complete = StepByteOrderMark (inbuf, ref bomIndex);
}
}
complete = StepByteOrderMark (ref bomIndex);
} while (!complete && inputIndex == inputEnd);

return complete;
Expand All @@ -89,11 +85,7 @@ async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
return;
}

unsafe {
fixed (byte* inbuf = input) {
complete = StepMboxMarkerStart (inbuf, ref midline);
}
}
complete = StepMboxMarkerStart (ref midline);
} while (!complete);

var mboxMarkerOffset = GetOffset (inputIndex);
Expand All @@ -109,13 +101,8 @@ async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
}

int startIndex = inputIndex;
int count;

unsafe {
fixed (byte* inbuf = input) {
complete = StepMboxMarker (inbuf, out count);
}
}
complete = StepMboxMarker (out int count);

// TODO: Remove beginOffset and lineNumber arguments from OnMboxMarkerReadAsync() in v5.0
await OnMboxMarkerReadAsync (input, startIndex, count, mboxMarkerOffset, mboxMarkerLineNumber, cancellationToken).ConfigureAwait (false);
Expand Down Expand Up @@ -177,41 +164,27 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
}

// Scan ahead a bit to see if this looks like an invalid header.
do {
unsafe {
fixed (byte* inbuf = input) {
if (TryDetectInvalidHeader (inbuf, out invalid, out fieldNameLength, out headerFieldLength))
break;
}
}

while (!TryDetectInvalidHeader (out invalid, out fieldNameLength, out headerFieldLength)) {
int atleast = (inputEnd - inputIndex) + 1;

if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast) {
// Not enough input to even find the ':'... mark as invalid and continue?
invalid = true;
break;
}
} while (true);
}

if (invalid) {
// Figure out why this is an invalid header.

if (input[inputIndex] == (byte) '-') {
// Check for a boundary marker. If the message is properly formatted, this will NEVER happen.
do {
unsafe {
fixed (byte* inbuf = input) {
if (TryCheckBoundaryWithinHeaderBlock (inbuf))
break;
}
}

while (!TryCheckBoundaryWithinHeaderBlock ()) {
int atleast = (inputEnd - inputIndex) + 1;

if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast)
break;
} while (true);
}

// Note: If a boundary was discovered, then the state will be updated to MimeParserState.Boundary.
if (state == MimeParserState.Boundary)
Expand All @@ -220,19 +193,12 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
// Fall through and act as if we're consuming a header.
} else if (input[inputIndex] == (byte) 'F' || input[inputIndex] == (byte) '>') {
// Check for an mbox-style From-line. Again, if the message is properly formatted and not truncated, this will NEVER happen.
do {
unsafe {
fixed (byte* inbuf = input) {
if (TryCheckMboxMarkerWithinHeaderBlock (inbuf))
break;
}
}

while (!TryCheckMboxMarkerWithinHeaderBlock ()) {
int atleast = (inputEnd - inputIndex) + 1;

if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast)
break;
} while (true);
}

// state will be one of the following values:
// 1. Complete: This means that we've found an actual mbox marker
Expand Down Expand Up @@ -260,20 +226,13 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
bool midline = true;

// Consume the header value.
do {
unsafe {
fixed (byte* inbuf = input) {
if (StepHeaderValue (inbuf, ref midline))
break;
}
}

while (!StepHeaderValue (ref midline)) {
if (await ReadAheadAsync (1, 0, cancellationToken).ConfigureAwait (false) == 0) {
state = MimeParserState.Content;
eof = true;
break;
}
} while (true);
}

if (toplevel && headerCount == 0 && invalid && !IsMboxMarker (headerBuffer)) {
state = MimeParserState.Error;
Expand All @@ -293,12 +252,8 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
async Task<bool> SkipLineAsync (bool consumeNewLine, CancellationToken cancellationToken)
{
do {
unsafe {
fixed (byte* inbuf = input) {
if (InnerSkipLine (inbuf, consumeNewLine))
return true;
}
}
if (InnerSkipLine (consumeNewLine))
return true;

if (await ReadAheadAsync (ReadAheadSize, 1, cancellationToken).ConfigureAwait (false) <= 0)
return false;
Expand Down Expand Up @@ -346,11 +301,7 @@ async Task<ScanContentResult> ScanContentAsync (ScanContentType type, long begin

int contentIndex = inputIndex;

unsafe {
fixed (byte* inbuf = input) {
ScanContent (inbuf, ref nleft, ref midline, ref formats);
}
}
ScanContent (ref nleft, ref midline, ref formats);

if (contentIndex < inputIndex) {
switch (type) {
Expand Down Expand Up @@ -413,24 +364,11 @@ async Task<int> ConstructMessagePartAsync (int depth, CancellationToken cancella
return 0;
}

unsafe {
fixed (byte* inbuf = input) {
byte* start = inbuf + inputIndex;
byte* inend = inbuf + inputEnd;
byte* inptr = start;

*inend = (byte) '\n';

while (*inptr != (byte) '\n')
inptr++;

// Note: This isn't obvious, but if the "boundary" that was found is an Mbox "From " line, then
// either the current stream offset is >= contentEnd -or- RespectContentLength is false. It will
// *never* be an Mbox "From " marker in Entity mode.
if ((boundary = CheckBoundary (inputIndex, start, (int) (inptr - start))) != BoundaryType.None)
return GetLineCount (beginLineNumber, beginOffset, GetEndOffset (inputIndex));
}
}
// Note: This isn't obvious, but if the "boundary" that was found is an Mbox "From " line, then
// either the current stream offset is >= contentEnd -or- RespectContentLength is false. It will
// *never* be an Mbox "From " marker in Entity mode.
if ((boundary = CheckBoundary ()) != BoundaryType.None)
return GetLineCount (beginLineNumber, beginOffset, GetEndOffset (inputIndex));
}

// Note: When parsing non-toplevel parts, the header parser will never result in the Error state.
Expand Down Expand Up @@ -626,14 +564,10 @@ async Task<int> ConstructMultipartAsync (ContentType contentType, int depth, Can
// We either found the end of the stream or we found a parent's boundary
PopBoundary ();

unsafe {
fixed (byte* inbuf = input) {
if (boundary == BoundaryType.ParentEndBoundary && FoundImmediateBoundary (inbuf, true))
boundary = BoundaryType.ImmediateEndBoundary;
else if (boundary == BoundaryType.ParentBoundary && FoundImmediateBoundary (inbuf, false))
boundary = BoundaryType.ImmediateBoundary;
}
}
if (boundary == BoundaryType.ParentEndBoundary && FoundImmediateBoundary (true))
boundary = BoundaryType.ImmediateEndBoundary;
else if (boundary == BoundaryType.ParentBoundary && FoundImmediateBoundary (false))
boundary = BoundaryType.ImmediateBoundary;

endOffset = GetEndOffset (inputIndex);

Expand Down
Loading

0 comments on commit 5f25f1f

Please sign in to comment.