diff --git a/concat/__main__.py b/concat/__main__.py
index 3d71f3a..077aa14 100644
--- a/concat/__main__.py
+++ b/concat/__main__.py
@@ -2,7 +2,12 @@
 
 import argparse
 from concat.transpile import parse, transpile_ast, typecheck
-from concat.error_reporting import get_line_at, create_parsing_failure_message
+from concat.error_reporting import (
+    get_line_at,
+    create_indentation_error_message,
+    create_lexical_error_message,
+    create_parsing_failure_message,
+)
 import concat.execute
 import concat.lex
 import concat.parser_combinators
@@ -11,7 +16,7 @@
 import json
 import os.path
 import sys
-from typing import Callable, IO, AnyStr
+from typing import Callable, IO, AnyStr, assert_never
 
 
 filename = '<stdin>'
@@ -52,28 +57,39 @@ def func(name: str) -> IO[AnyStr]:
     '--tokenize',
     action='store_true',
     default=False,
-    help='tokenize input from the given file and print the tokens as a JSON array',
+    help=(
+        'tokenize input from the given file and print the tokens as a JSON '
+        'array'
+    ),
 )
 
-# We should pass any unknown args onto the program we're about to run.
-# FIXME: There might be a better way to go about this, but I think this is fine
-# for now.
-args, rest = arg_parser.parse_known_args()
-sys.argv = [sys.argv[0], *rest]
 
+def tokenize_printing_errors() -> list[concat.lex.Token]:
+    token_results = concat.lex.tokenize(args.file.read())
+    tokens = list[concat.lex.Token]()
+    for r in token_results:
+        if r.type == 'token':
+            tokens.append(r.token)
+        elif r.type == 'indent-err':
+            position = (r.err.lineno or 1, r.err.offset or 0)
+            message = r.err.msg
+            print('Indentation error:')
+            print(
+                create_indentation_error_message(args.file, position, message)
+            )
+        elif r.type == 'token-err':
+            position = r.location
+            message = str(r.err)
+            print('Lexical error:')
+            print(create_lexical_error_message(args.file, position, message))
+        else:
+            assert_never(r)
+    return tokens
 
-if args.tokenize:
-    code = args.file.read()
-    tokens = concat.lex.tokenize(code, should_preserve_comments=True)
-    json.dump(tokens, sys.stdout, cls=concat.lex.TokenEncoder)
-    sys.exit()
 
-# interactive mode
-if args.file.isatty():
-    concat.stdlib.repl.repl([], [], args.debug)
-else:
+def batch_main():
     try:
-        tokens = concat.lex.tokenize(args.file.read())
+        tokens = tokenize_printing_errors()
         concat_ast = parse(tokens)
         recovered_parsing_failures = concat_ast.parsing_failures
         for failure in recovered_parsing_failures:
@@ -121,3 +137,26 @@ def func(name: str) -> IO[AnyStr]:
             sys.exit(1)
     finally:
         args.file.close()
+
+
+def main():
+    # interactive mode
+    if args.file.isatty():
+        concat.stdlib.repl.repl([], [], args.debug)
+    else:
+        batch_main()
+
+
+# We should pass any unknown args onto the program we're about to run.
+# FIXME: There might be a better way to go about this, but I think this is fine
+# for now.
+args, rest = arg_parser.parse_known_args()
+sys.argv = [sys.argv[0], *rest]
+
+if args.tokenize:
+    code = args.file.read()
+    tokens = concat.lex.tokenize(code, should_preserve_comments=True)
+    json.dump(tokens, sys.stdout, cls=concat.lex.TokenEncoder)
+    sys.exit()
+
+main()
diff --git a/concat/error_reporting.py b/concat/error_reporting.py
index f44b75f..8b1287f 100644
--- a/concat/error_reporting.py
+++ b/concat/error_reporting.py
@@ -16,7 +16,12 @@ def create_parsing_failure_message(
     stream: Sequence[concat.lex.Token],
     failure: concat.parser_combinators.FailureTree,
 ) -> str:
-    location = stream[failure.furthest_index].start
+    if failure.furthest_index < len(stream):
+        location = stream[failure.furthest_index].start
+    elif stream:
+        location = stream[-1].start
+    else:
+        location = (1, 0)
     line = get_line_at(file, location)
     message = f'Expected {failure.expected} at line {location[0]}, column {location[1] + 1}:\n{line.rstrip()}\n{" " * location[1] + "^"}'
     if failure.children:
@@ -26,3 +31,28 @@ def create_parsing_failure_message(
                 create_parsing_failure_message(file, stream, f), '  '
             )
     return message
+
+
+def create_lexical_error_message(
+    file: TextIO, location: concat.astutils.Location, message: str
+) -> str:
+    line = get_line_at(file, location)
+    message = (
+        f'Cannot tokenize file at line {location[0]}, '
+        f'column {location[1] + 1}:\n'
+        f'{line.rstrip()}\n'
+        f'{' ' * location[1] + '^'}\n'
+    )
+    return message
+
+
+def create_indentation_error_message(
+    file: TextIO, location: concat.astutils.Location, message: str
+) -> str:
+    line = get_line_at(file, location)
+    message = (
+        f'Malformed indentation at line {location[0]}, '
+        f'column {location[1] + 1}:\n'
+        f'{line.rstrip()}\n'
+    )
+    return message
diff --git a/concat/lex.py b/concat/lex.py
index e7887c6..849435e 100644
--- a/concat/lex.py
+++ b/concat/lex.py
@@ -1,9 +1,11 @@
-import concat.astutils
+from __future__ import annotations
+from concat.astutils import Location, are_on_same_line_and_offset_by
 import dataclasses
 import io
 import json
 import tokenize as py_tokenize
-from typing import Iterator, List, Optional, Tuple, Union
+import token
+from typing import Iterator, List, Literal, Optional, Tuple, Union
 
 
 @dataclasses.dataclass
@@ -19,8 +21,8 @@ class Token:
 
     type: str = ''
     value: str = ''
-    start: 'concat.astutils.Location' = (0, 0)
-    end: 'concat.astutils.Location' = (0, 0)
+    start: Location = (0, 0)
+    end: Location = (0, 0)
     is_keyword: bool = False
 
 
@@ -33,7 +35,10 @@ def default(self, obj):
         return super().default(obj)
 
 
-def tokenize(code: str, should_preserve_comments: bool = False) -> List[Token]:
+def tokenize(
+    code: str,
+    should_preserve_comments: bool = False,
+) -> List[Result]:
     lexer = Lexer()
     lexer.input(code, should_preserve_comments)
     tokens = []
@@ -46,10 +51,8 @@ def tokenize(code: str, should_preserve_comments: bool = False) -> List[Token]:
 
 
 TokenTuple = Union[
-    Tuple[str, str, 'concat.astutils.Location', 'concat.astutils.Location'],
-    Tuple[
-        str, str, 'concat.astutils.Location', 'concat.astutils.Location', bool
-    ],
+    Tuple[str, str, Location, Location],
+    Tuple[str, str, Location, Location, bool],
 ]
 
 
@@ -61,137 +64,197 @@ class Lexer:
 
     def __init__(self) -> None:
         self.data: str
-        self.tokens: Optional[Iterator[py_tokenize.TokenInfo]]
+        self.tokens: Iterator[
+            py_tokenize.TokenInfo | IndentationErrorResult | TokenErrorResult
+        ]
         self.lineno: int
         self.lexpos: int
-        self._concat_token_iterator: Iterator['Token']
+        self._concat_token_iterator: Iterator[Result]
         self._should_preserve_comments: bool
 
     def input(self, data: str, should_preserve_comments: bool = False) -> None:
         """Initialize the Lexer object with the data to tokenize."""
         self.data = data
-        self.tokens = None
+        self.tokens = self._py_tokens_handling_errors(
+            py_tokenize.tokenize(
+                io.BytesIO(self.data.encode('utf-8')).readline
+            )
+        )
         self.lineno = 1
         self.lexpos = 0
-        self._concat_token_iterator = self._tokens()
+        self._concat_token_iterator = self._tokens_filtering_nl_and_comments(
+            self._tokens_glued(self._tokens())
+        )
         self._should_preserve_comments = should_preserve_comments
 
-    def token(self) -> Optional['Token']:
+    def token(self) -> Optional[Result]:
         """Return the next token as a Token object."""
         return next(self._concat_token_iterator, None)
 
-    def _tokens(self) -> Iterator['Token']:
-        import token
-
-        if self.tokens is None:
-            self.tokens = py_tokenize.tokenize(
-                io.BytesIO(self.data.encode('utf-8')).readline
-            )
+    def _py_tokens_handling_errors(
+        self, tokens: Iterator[py_tokenize.TokenInfo]
+    ) -> Iterator[
+        py_tokenize.TokenInfo | IndentationErrorResult | TokenErrorResult
+    ]:
+        while True:
+            try:
+                tok = next(tokens)
+                yield tok
+            except StopIteration:
+                return
+            except IndentationError as e:
+                yield IndentationErrorResult(e)
+            except py_tokenize.TokenError as e:
+                yield TokenErrorResult(e, (self.lineno, self.lexpos))
 
-        glued_token_prefix = None
-        for token_ in self.tokens:
-            tok = Token()
-            _, tok.value, tok.start, tok.end, _ = token_
-            tok.type = token.tok_name[token_.exact_type]
-            tokens_to_massage = [tok]
-            if glued_token_prefix:
-                if (
-                    glued_token_prefix.value == '-'
-                    and tok.value == '-'
-                    and concat.astutils.are_on_same_line_and_offset_by(
-                        glued_token_prefix.start, tok.start, 1
-                    )
-                ):
-                    glued_token_prefix.value = '--'
-                    glued_token_prefix.type = 'MINUSMINUS'
-                    glued_token_prefix.end = tok.end
+    def _tokens_glued(self, tokens: Iterator[Result]) -> Iterator[Result]:
+        glued_token_prefix: Token | None = None
+        for r in tokens:
+            if r.type == 'token':
+                tok = r.token
+                if glued_token_prefix:
                     self._update_position(glued_token_prefix)
-                    yield glued_token_prefix
+                    if tok.value == '-' and are_on_same_line_and_offset_by(
+                        glued_token_prefix.start, tok.start, 1
+                    ):
+                        glued_token_prefix.value = '--'
+                        glued_token_prefix.type = 'MINUSMINUS'
+                        glued_token_prefix.end = tok.end
+                        yield TokenResult(glued_token_prefix)
+                        glued_token_prefix = None
+                        continue
+                    yield TokenResult(glued_token_prefix)
                     glued_token_prefix = None
-                    continue
+                if tok.value == '-':
+                    glued_token_prefix = tok
                 else:
-                    tokens_to_massage[:0] = [glued_token_prefix]
-                    glued_token_prefix = None
-            for tok in tokens_to_massage:
-                if tok.type in {'NL', 'COMMENT'}:
                     self._update_position(tok)
-                    if (
-                        self._should_preserve_comments
-                        and tok.type == 'COMMENT'
-                    ):
-                        yield tok
-                    continue
-                elif tok.type == 'ERRORTOKEN':
-                    if tok.value == ' ':
-                        self._update_position(tok)
-                        continue
-                    elif tok.value == '!':
-                        tok.type = 'EXCLAMATIONMARK'
-                elif tok.value in {'def', 'import', 'from'}:
-                    tok.type = tok.value.upper()
-                    tok.is_keyword = True
-                elif tok.value == '$':
-                    tok.type = 'DOLLARSIGN'
-                elif tok.type != 'NAME' and tok.value in {
-                    '...',
-                    '-',
-                    '**',
-                    '~',
-                    '*',
-                    '*=',
-                    '//',
-                    '/',
-                    '%',
-                    '+',
-                    '<<',
-                    '>>',
-                    '&',
-                    '^',
-                    '|',
-                    '<',
-                    '>',
-                    '==',
-                    '>=',
-                    '<=',
-                    '!=',
-                    'is',
-                    'in',
-                    'or',
-                    'and',
-                    'not',
-                    '@',
-                }:
-                    tok.type = 'NAME'
-                    if tok.value == '-':
-                        glued_token_prefix = tok
-                        continue
+                    yield r
+            else:
+                yield r
+        if glued_token_prefix:
+            self._update_position(glued_token_prefix)
+            yield TokenResult(glued_token_prefix)
+
+    def _tokens_filtering_nl_and_comments(
+        self, tokens: Iterator[Result]
+    ) -> Iterator[Result]:
+        for r in tokens:
+            if r.type != 'token' or r.token.type not in ['NL', 'COMMENT']:
+                yield r
+                continue
+            tok = r.token
+            self._update_position(tok)
+            if self._should_preserve_comments and tok.type == 'COMMENT':
+                yield r
 
+    def _tokens(self) -> Iterator[Result]:
+        for token_or_error in self.tokens:
+            if isinstance(
+                token_or_error, (IndentationErrorResult, TokenErrorResult)
+            ):
+                yield token_or_error
+                continue
+            tok = Token()
+            _, tok.value, tok.start, tok.end, _ = token_or_error
+            tok.type = token.tok_name[token_or_error.exact_type]
+            if tok.type == 'ERRORTOKEN' and tok.value == ' ':
                 self._update_position(tok)
+                continue
+            if tok.value in {'def', 'import', 'from', 'as', 'class', 'cast'}:
+                tok.type = tok.value.upper()
+                tok.is_keyword = True
+            elif tok.value == '$':
+                tok.type = 'DOLLARSIGN'
+            elif tok.type != 'NAME' and tok.value in {
+                '...',
+                '-',
+                '**',
+                '~',
+                '*',
+                '*=',
+                '//',
+                '/',
+                '%',
+                '+',
+                '<<',
+                '>>',
+                '&',
+                '^',
+                '|',
+                '<',
+                '>',
+                '==',
+                '>=',
+                '<=',
+                '!=',
+                'is',
+                'in',
+                'or',
+                'and',
+                'not',
+                '@',
+            }:
+                tok.type = 'NAME'
 
-                if tok.type == 'NAME':
-                    type_map = {'as': 'AS', 'class': 'CLASS', 'cast': 'CAST'}
-                    if tok.value in type_map:
-                        tok.type = type_map[tok.value]
-                        tok.is_keyword = True
-                elif tok.type == 'STRING' and self.__is_bytes_literal(
-                    tok.value
-                ):
-                    tok.type = 'BYTES'
-                elif tok.value == '`':
-                    tok.type = 'BACKTICK'
-                elif tok.type == 'EXCLAMATION':
-                    tok.type = 'EXCLAMATIONMARK'
+            self._update_position(tok)
 
-                yield tok
+            if tok.type == 'STRING' and self.__is_bytes_literal(tok.value):
+                tok.type = 'BYTES'
+            elif tok.value == '`':
+                tok.type = 'BACKTICK'
+            elif tok.value == '!':
+                tok.type = 'EXCLAMATIONMARK'
+
+            yield TokenResult(tok)
 
     def _update_position(self, tok: 'Token') -> None:
-        self.lexpos += len(tok.value)
-        if tok.type in {'NEWLINE', 'NL'}:
-            self.lineno += 1
+        self.lineno, self.lexpos = tok.start
 
     def __is_bytes_literal(self, literal: str) -> bool:
         return isinstance(eval(literal), bytes)
 
 
+@dataclasses.dataclass
+class TokenResult:
+    """Result class for successfully generated tokens."""
+
+    type: Literal['token']
+    token: Token
+
+    def __init__(self, token: Token) -> None:
+        self.type = 'token'
+        self.token = token
+
+
+@dataclasses.dataclass
+class IndentationErrorResult:
+    """Result class for IndentationErrors raised by the Python tokenizer."""
+
+    type: Literal['indent-err']
+    err: IndentationError
+
+    def __init__(self, err: IndentationError) -> None:
+        self.type = 'indent-err'
+        self.err = err
+
+
+@dataclasses.dataclass
+class TokenErrorResult:
+    """Result class for TokenErrors raised by the Python tokenizer."""
+
+    type: Literal['token-err']
+    err: py_tokenize.TokenError
+    location: Location
+
+    def __init__(self, err: py_tokenize.TokenError, loc: Location) -> None:
+        self.type = 'token-err'
+        self.err = err
+        self.location = loc
+
+
+type Result = TokenResult | IndentationErrorResult | TokenErrorResult
+
+
 def to_tokens(*tokTuples: TokenTuple) -> List[Token]:
     return [Token(*tuple) for tuple in tokTuples]
diff --git a/concat/lsp/__init__.py b/concat/lsp/__init__.py
index 2bd3642..cd0e8ec 100644
--- a/concat/lsp/__init__.py
+++ b/concat/lsp/__init__.py
@@ -1,8 +1,8 @@
 from concat.astutils import Location
 import concat.jsonrpc
-from concat.lex import tokenize
+from concat.lex import Token, tokenize
 from concat.logging import ConcatLogger
-from concat.parse import ParseError
+from concat.parser_combinators import ParseError
 from concat.transpile import parse, typecheck
 from concat.typecheck import StaticAnalysisError
 from enum import Enum, IntEnum
@@ -10,7 +10,6 @@
 import logging
 from pathlib import Path
 import re
-import tokenize as py_tokenize
 from typing import (
     BinaryIO,
     Callable,
@@ -411,14 +410,26 @@ def diagnose(self) -> None:
 
     def _diagnose(self) -> List[_Diagnostic]:
         text_lines = self._text.splitlines(keepends=True)
-        try:
-            tokens = tokenize(self._text)
-        except py_tokenize.TokenError as e:
-            message = e.args[0]
-            position = _Position.from_tokenizer_location(text_lines, e.args[1])
-            range_ = _Range(position, position)
-            return [_Diagnostic(range_, message)]
+        token_results = tokenize(self._text)
         diagnostics = []
+        tokens = list[Token]()
+        for r in token_results:
+            if r.type == 'token':
+                tokens.append(r.token)
+            elif r.type == 'indent-err':
+                position = _Position.from_tokenizer_location(
+                    text_lines, (r.err.lineno or 1, r.err.offset or 0)
+                )
+                range_ = _Range(position, position)
+                message = r.err.msg
+                diagnostics.append(_Diagnostic(range_, message))
+            elif r.type == 'token-err':
+                position = _Position.from_tokenizer_location(
+                    text_lines, r.location
+                )
+                range_ = _Range(position, position)
+                message = str(r.err)
+                diagnostics.append(_Diagnostic(range_, message))
         for token in tokens:
             if token.type == 'ERRORTOKEN':
                 _logger.debug('error token: {token!r}', token=token)
@@ -436,19 +447,21 @@ def _diagnose(self) -> List[_Diagnostic]:
                 diagnostics.append(_Diagnostic(range_, message))
         try:
             ast = parse(tokens)
+            ast.assert_no_parse_errors()
         except ParseError as e:
-            parser_start_position = e.get_start_position()
-            parser_end_position = e.get_end_position()
-            range_ = _Range(
-                _Position.from_tokenizer_location(
-                    text_lines, parser_start_position
-                ),
-                _Position.from_tokenizer_location(
-                    text_lines, parser_end_position
-                ),
-            )
-            message = f'Expected one of: {", ".join(e.expected)}'
-            diagnostics.append(_Diagnostic(range_, message))
+            for failure in e.args[0].failures:
+                parser_start_position = tokens[failure.furthest_index].start
+                parser_end_position = parser_start_position
+                range_ = _Range(
+                    _Position.from_tokenizer_location(
+                        text_lines, parser_start_position
+                    ),
+                    _Position.from_tokenizer_location(
+                        text_lines, parser_end_position
+                    ),
+                )
+                message = f'Expected one of: {failure.expected}'
+                diagnostics.append(_Diagnostic(range_, message))
             return diagnostics
         try:
             # https://stackoverflow.com/questions/5977576/is-there-a-convenient-way-to-map-a-file-uri-to-os-path
diff --git a/concat/stdlib/repl.py b/concat/stdlib/repl.py
index e4d139e..70d8b93 100644
--- a/concat/stdlib/repl.py
+++ b/concat/stdlib/repl.py
@@ -23,6 +23,10 @@
 sys.modules[__name__].__class__ = concat.stdlib.importlib.Module
 
 
+class _REPLTokenizeError(Exception):
+    pass
+
+
 def _tokenize(code: str) -> List[concat.lex.Token]:
     lexer = concat.lex.Lexer()
     lexer.input(code)
@@ -31,7 +35,9 @@ def _tokenize(code: str) -> List[concat.lex.Token]:
         token = lexer.token()
         if token is None:
             break
-        tokens.append(token)
+        if token.type != 'token':
+            raise _REPLTokenizeError from token.err
+        tokens.append(token.token)
     return tokens
 
 
@@ -192,6 +198,9 @@ def show_var(stack: List[object], stash: List[object]):
             except concat.execute.ConcatRuntimeError as e:
                 print('Runtime error:\n')
                 print(e)
+            except _REPLTokenizeError as e:
+                print('Lexical error:\n')
+                print(e)
             except EOFError:
                 break
             else:
diff --git a/concat/tests/stdlib/test_python_concat_interface.py b/concat/tests/stdlib/test_python_concat_interface.py
index 7abbb43..50ad8a8 100644
--- a/concat/tests/stdlib/test_python_concat_interface.py
+++ b/concat/tests/stdlib/test_python_concat_interface.py
@@ -731,7 +731,9 @@ def test_modules_are_callable(self) -> None:
             token = lexer.token()
             if token is None:
                 break
-            tokens.append(token)
+            if token.type != 'token':
+                raise token.err
+            tokens.append(token.token)
         parser = concat.parse.ParserDict()
         parser.extend_with(concat.parse.extension)
         concat_ast = parser.parse(tokens)
diff --git a/concat/tests/test_lex.py b/concat/tests/test_lex.py
index 161f0e9..f2b5633 100644
--- a/concat/tests/test_lex.py
+++ b/concat/tests/test_lex.py
@@ -1,5 +1,6 @@
 import concat.lex as lex
 from concat.tests.small_example_programs import examples
+import textwrap
 import unittest
 
 
@@ -32,6 +33,23 @@ def test_examples(self) -> None:
                     tokens.append(token)
 
                 self.assertEqual(len(tokens), len(expected_tokens))
-                expectationPairs = zip(tokens, expected_tokens)
+                expectationPairs = zip(
+                    tokens, map(lex.TokenResult, expected_tokens)
+                )
                 for actual_token, expected_token in expectationPairs:
                     self.assertEqual(actual_token, expected_token)
+
+    @staticmethod
+    def test_indentation_error() -> None:
+        code = textwrap.dedent("""\
+            def remove_stack_polymorphism(
+                f:forall `t *s. (*s i:`t -- *s) -- g:forall `t. (i:`t -- )
+            ):
+              ()
+             dfbfdbff""")
+        lexer = lex.Lexer()
+        lexer.input(code)
+        while True:
+            token = lexer.token()
+            if token is None:
+                break
diff --git a/concat/tests/test_typecheck.py b/concat/tests/test_typecheck.py
index 8aee6f8..4a1ccd6 100644
--- a/concat/tests/test_typecheck.py
+++ b/concat/tests/test_typecheck.py
@@ -44,7 +44,7 @@
 
 
 def lex_string(string: str) -> List[concat.lex.Token]:
-    return lex.tokenize(string)
+    return [r.token for r in lex.tokenize(string) if r.type == 'token']
 
 
 def parse(string: str) -> concat.parse.TopLevelNode:
diff --git a/concat/typecheck/__init__.py b/concat/typecheck/__init__.py
index dcb6b99..d009499 100644
--- a/concat/typecheck/__init__.py
+++ b/concat/typecheck/__init__.py
@@ -28,6 +28,7 @@
     TYPE_CHECKING,
     Tuple,
     Union,
+    assert_never,
     cast,
 )
 from concat.typecheck.types import (
@@ -58,7 +59,11 @@
     no_return_type,
 )
 import abc
-from concat.error_reporting import create_parsing_failure_message
+from concat.error_reporting import (
+    create_indentation_error_message,
+    create_lexical_error_message,
+    create_parsing_failure_message,
+)
 from concat.lex import Token
 import itertools
 import pathlib
@@ -658,7 +663,24 @@ def _check_stub_resolved_path(
         raise TypeError(f'Type stubs at {path} do not exist') from e
     except IOError as e:
         raise TypeError(f'Failed to read type stubs at {path}') from e
-    tokens = concat.lex.tokenize(source)
+    token_results = concat.lex.tokenize(source)
+    tokens = list[Token]()
+    with path.open() as f:
+        for r in token_results:
+            if r.type == 'token':
+                tokens.append(r.token)
+            elif r.type == 'indent-err':
+                print('Indentation error:')
+                print(
+                    create_indentation_error_message(
+                        f, (r.err.lineno or 1, r.err.offset or 0), r.err.msg
+                    )
+                )
+            elif r.type == 'token-err':
+                print('Lexical error:')
+                print(create_lexical_error_message(f, r.location, str(r.err)))
+            else:
+                assert_never(r)
     env = initial_env or Environment()
     from concat.transpile import parse