Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bridge: Fix text channels with split UTF8 chars across frames #20628

Merged
merged 7 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ Makefile.in
/src/common/fail.html.c
/src/systemd/cockpit*.service
/src/systemd/cockpit*.socket
/src/systemd/cockpit-tempfiles.conf
/src/systemd/tmpfiles.d/cockpit-ws.conf
/src/tls/cockpit-certificate-helper
/src/ws/cockpit-desktop
/src/ws/cockpit.appdata.xml
Expand Down
10 changes: 5 additions & 5 deletions containers/ws/cockpit-auth-ssh-key
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def usage():


def send_frame(content):
data = json.dumps(content).encode('utf-8')
os.write(1, str(len(data) + 1).encode('utf-8'))
data = json.dumps(content).encode()
os.write(1, str(len(data) + 1).encode())
os.write(1, b"\n\n")
os.write(1, data)

Expand Down Expand Up @@ -109,7 +109,7 @@ def read_frame(fd):
size = size - len(d)
data += d

return data.decode("UTF-8")
return data.decode()


def read_auth_reply():
Expand All @@ -129,7 +129,7 @@ def decode_basic_header(response):
assert response
assert response.startswith(starts), response

val = base64.b64decode(response[len(starts):].encode('utf-8')).decode("utf-8")
val = base64.b64decode(response[len(starts):].encode()).decode()
user, password = val.split(':', 1)
return user, password

Expand All @@ -150,7 +150,7 @@ cat /run/password""")

pass_fd = os.open("/run/password", os.O_CREAT | os.O_EXCL | os.O_WRONLY | os.O_CLOEXEC, mode=0o600)
try:
os.write(pass_fd, password.encode("UTF-8"))
os.write(pass_fd, password.encode())
os.close(pass_fd)

p = subprocess.run(["ssh-add", "-t", "30", fname],
Expand Down
29 changes: 29 additions & 0 deletions pkg/base1/test-http.js
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,35 @@ QUnit.test("streaming", assert => {
});
});

QUnit.test("split UTF8 frames", assert => {
const done = assert.async();
assert.expect(1);

cockpit.http(test_server)
.get("/mock/split-utf8")
.then(resp => assert.equal(resp, "initialfirst half é second halffinal", "correct response"))
.finally(done);
});

QUnit.test("truncated UTF8 frame", assert => {
const done = assert.async();
assert.expect(3);
let received = "";

cockpit.http(test_server)
.get("/mock/truncated-utf8")
.stream(block => { received += block })
.then(() => assert.ok(false, "should not have succeeded"))
// does not include the first byte of é
.catch(ex => {
// does not include the first byte of é
assert.equal(received, "initialfirst half ", "received expected data");
assert.equal(ex.problem, "protocol-error", "error code");
assert.ok(ex.message.includes("unexpected end of data"), ex.message);
})
.finally(done);
});

QUnit.test("close", assert => {
const done = assert.async();
assert.expect(3);
Expand Down
2 changes: 1 addition & 1 deletion pkg/lib/cockpit.js
Original file line number Diff line number Diff line change
Expand Up @@ -3896,7 +3896,7 @@ function factory() {

if (options.problem) {
http_debug("http problem: ", options.problem);
dfd.reject(new BasicError(options.problem));
dfd.reject(new BasicError(options.problem, options.message));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This added line is not executed by any test.

} else {
const body = buffer.squash();

Expand Down
6 changes: 3 additions & 3 deletions pkg/storaged/crypto/luksmeta-monitor-hack.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def b64_decode(data):


def get_clevis_config_from_protected_header(protected_header):
header = b64_decode(protected_header).decode("utf-8")
header = b64_decode(protected_header).decode()
header_object = json.loads(header)
clevis = header_object.get("clevis", None)
if clevis is None:
Expand Down Expand Up @@ -85,7 +85,7 @@ def info(dev):
"-u", "cb6e8904-81ff-40da-a84a-07ab9ab5715e"],
stderr=subprocess.PIPE)
entry["ClevisConfig"] = {
"v": json.dumps(get_clevis_config_from_jwe(luksmeta.decode("utf-8")))
"v": json.dumps(get_clevis_config_from_jwe(luksmeta.decode()))
}
except (subprocess.CalledProcessError, FileNotFoundError):
pass
Expand All @@ -98,7 +98,7 @@ def info(dev):
try:
token = subprocess.check_output(["cryptsetup", "token", "export", dev, "--token-id", match.group(1)],
stderr=subprocess.PIPE)
token_object = json.loads(token.decode("utf-8"))
token_object = json.loads(token.decode())
if token_object.get("type") == "clevis":
config = json.dumps(get_clevis_config_from_protected_header(token_object["jwe"]["protected"]))
for slot_str in token_object.get("keyslots", []):
Expand Down
2 changes: 1 addition & 1 deletion src/build_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def write(filename: str, data: AnyStr) -> None:
def beipack_self(main: str, args: str = '') -> bytes:
from cockpit._vendor.bei import beipack
contents = {name: wheel.read(name) for name in wheel.namelist()}
pack = beipack.pack(contents, main, args=args).encode('utf-8')
pack = beipack.pack(contents, main, args=args).encode()
return lzma.compress(pack, preset=lzma.PRESET_EXTREME)

def write_distinfo(filename: str, lines: Iterable[str]) -> None:
Expand Down
55 changes: 49 additions & 6 deletions src/cockpit/channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import asyncio
import codecs
import json
import logging
import traceback
Expand Down Expand Up @@ -111,6 +112,8 @@ class Channel(Endpoint):
# These get filled in from .do_open()
channel = ''
group = ''
is_binary: bool
decoder: 'codecs.IncrementalDecoder | None'

# input
def do_control(self, command: str, message: JsonObject) -> None:
Expand All @@ -124,6 +127,8 @@ def do_control(self, command: str, message: JsonObject) -> None:
self._send_pings = True
self._ack_bytes = get_enum(message, 'send-acks', ['bytes'], None) is not None
self.group = get_str(message, 'group', 'default')
self.is_binary = get_enum(message, 'binary', ['raw'], None) is not None
self.decoder = None
self.freeze_endpoint()
self.do_open(message)
elif command == 'ready':
Expand Down Expand Up @@ -217,7 +222,17 @@ def ready(self, **kwargs: JsonValue) -> None:
self.thaw_endpoint()
self.send_control(command='ready', **kwargs)

def __decode_frame(self, data: bytes, *, final: bool = False) -> str:
assert self.decoder is not None
try:
return self.decoder.decode(data, final=final)
except UnicodeDecodeError as exc:
raise ChannelError('protocol-error', message=str(exc)) from exc

def done(self) -> None:
# any residue from partial send_data() frames? this is invalid, fail the channel
if self.decoder is not None:
self.__decode_frame(b'', final=True)
self.send_control(command='done')

# tasks and close management
Expand Down Expand Up @@ -263,8 +278,8 @@ def close(self, close_args: 'JsonObject | None' = None) -> None:
if not self._tasks:
self._close_now()

def send_data(self, data: bytes) -> bool:
"""Send data and handle book-keeping for flow control.
def send_bytes(self, data: bytes) -> bool:
"""Send binary data and handle book-keeping for flow control.

The flow control is "advisory". The data is sent immediately, even if
it's larger than the window. In general you should try to send packets
Expand All @@ -273,6 +288,10 @@ def send_data(self, data: bytes) -> bool:
Returns True if there is still room in the window, or False if you
should stop writing for now. In that case, `.do_resume_send()` will be
called later when there is more room.

Be careful with text channels (i.e. without binary="raw"): you are responsible
for ensuring that @data is valid UTF-8. This isn't validated here for
efficiency reasons.
"""
self.send_channel_data(self.channel, data)

Expand All @@ -284,6 +303,34 @@ def send_data(self, data: bytes) -> bool:

return self._out_sequence < self._out_window

def send_data(self, data: bytes) -> bool:
"""Send data and transparently handle UTF-8 for text channels

Use this for channels which can be text, but are not guaranteed to get
valid UTF-8 frames -- i.e. multi-byte characters may be split across
frames. This is expensive, so prefer send_text() or send_bytes() wherever
possible.
"""
if self.is_binary:
return self.send_bytes(data)

# for text channels we must avoid splitting UTF-8 multi-byte characters,
# as these can't be sent to a WebSocket (and are often confusing for text streams as well)
if self.decoder is None:
self.decoder = codecs.getincrementaldecoder('utf-8')(errors='strict')
return self.send_text(self.__decode_frame(data))

def send_text(self, data: str) -> bool:
"""Send UTF-8 string data and handle book-keeping for flow control.

Similar to `send_bytes`, but for text data. The data is sent as UTF-8 encoded bytes.
"""
return self.send_bytes(data.encode())

def send_json(self, _msg: 'JsonObject | None' = None, **kwargs: JsonValue) -> bool:
pretty = self.json_encoder.encode(create_object(_msg, kwargs)) + '\n'
return self.send_text(pretty)

def do_pong(self, message):
if not self._send_pings: # huh?
logger.warning("Got wild pong on channel %s", self.channel)
Expand All @@ -299,10 +346,6 @@ def do_resume_send(self) -> None:

json_encoder: 'ClassVar[json.JSONEncoder]' = json.JSONEncoder(indent=2)

def send_json(self, _msg: 'JsonObject | None' = None, **kwargs: JsonValue) -> bool:
pretty = self.json_encoder.encode(create_object(_msg, kwargs)) + '\n'
return self.send_data(pretty.encode())

def send_control(self, command: str, **kwargs: JsonValue) -> None:
self.send_channel_control(self.channel, command, None, **kwargs)

Expand Down
8 changes: 3 additions & 5 deletions src/cockpit/channels/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
JsonError,
JsonObject,
get_bool,
get_enum,
get_int,
get_str,
get_strv,
Expand Down Expand Up @@ -123,7 +122,6 @@ class FsReadChannel(GeneratorChannel):

def do_yield_data(self, options: JsonObject) -> Generator[bytes, None, JsonObject]:
path = get_str(options, 'path')
binary = get_enum(options, 'binary', ['raw'], None) is not None
max_read_size = get_int(options, 'max_read_size', None)

logger.debug('Opening file "%s" for reading', path)
Expand All @@ -134,7 +132,7 @@ def do_yield_data(self, options: JsonObject) -> Generator[bytes, None, JsonObjec
if max_read_size is not None and buf.st_size > max_read_size:
raise ChannelError('too-large')

if binary and stat.S_ISREG(buf.st_mode):
if self.is_binary and stat.S_ISREG(buf.st_mode):
self.ready(size_hint=buf.st_size)
else:
self.ready()
Expand All @@ -144,8 +142,8 @@ def do_yield_data(self, options: JsonObject) -> Generator[bytes, None, JsonObjec
if data == b'':
break
logger.debug(' ...sending %d bytes', len(data))
if not binary:
data = data.replace(b'\0', b'').decode('utf-8', errors='ignore').encode('utf-8')
if not self.is_binary:
data = data.replace(b'\0', b'').decode(errors='ignore').encode()
yield data

return {'tag': tag_from_stat(buf)}
Expand Down
5 changes: 2 additions & 3 deletions src/cockpit/channels/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import ssl

from ..channel import AsyncChannel, ChannelError
from ..jsonutil import JsonObject, get_dict, get_enum, get_int, get_object, get_str, typechecked
from ..jsonutil import JsonObject, get_dict, get_int, get_object, get_str, typechecked

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -97,7 +97,6 @@ def request(
async def run(self, options: JsonObject) -> None:
logger.debug('open %s', options)

binary = get_enum(options, 'binary', ['raw'], None) is not None
method = get_str(options, 'method')
path = get_str(options, 'path')
headers = get_object(options, 'headers', lambda d: {k: typechecked(v, str) for k, v in d.items()}, None)
Expand Down Expand Up @@ -133,7 +132,7 @@ async def run(self, options: JsonObject) -> None:
self.send_control(command='response',
status=response.status,
reason=response.reason,
headers=self.get_headers(response, binary=binary))
headers=self.get_headers(response, binary=self.is_binary))

# Receive the body and finish up
try:
Expand Down
2 changes: 1 addition & 1 deletion src/cockpit/channels/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def send_updates(self, samples: Samples, last_samples: Samples):
self.send_meta(samples, timestamp)

self.last_timestamp = self.next_timestamp
self.send_data(json.dumps([data]).encode())
martinpitt marked this conversation as resolved.
Show resolved Hide resolved
self.send_text(json.dumps([data]))

async def run(self, options):
self.metrics = []
Expand Down
2 changes: 1 addition & 1 deletion src/cockpit/channels/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class PackagesChannel(AsyncChannel):
def http_error(self, status: int, message: str) -> None:
template = read_cockpit_data_file('fail.html')
self.send_json(status=status, reason='ERROR', headers={'Content-Type': 'text/html; charset=utf-8'})
self.send_data(template.replace(b'@@message@@', message.encode('utf-8')))
self.send_data(template.replace(b'@@message@@', message.encode()))
self.done()
self.close()

Expand Down
4 changes: 2 additions & 2 deletions src/cockpit/channels/trivial.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ class EchoChannel(Channel):
def do_open(self, options):
self.ready()

def do_data(self, data):
self.send_data(data)
def do_data(self, data: bytes) -> None:
self.send_bytes(data)

def do_done(self):
self.done()
Expand Down
2 changes: 1 addition & 1 deletion src/cockpit/misc/print.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def data(self, channel: str, /, data: bytes) -> None:

def json(self, channel: str, /, **kwargs: object) -> None:
"""Send a json message (built from **kwargs) on a channel"""
self.data(channel, json.dumps(kwargs, indent=2).encode('utf-8') + b'\n')
self.data(channel, json.dumps(kwargs, indent=2).encode() + b'\n')

def control(self, command: str, **kwargs: Any) -> None:
"""Send a control message, build from **kwargs"""
Expand Down
Loading