From bfacb48c00a5e7ceac040515702486738720e8ea Mon Sep 17 00:00:00 2001 From: Josef Haider Date: Wed, 25 Feb 2026 09:47:49 +0100 Subject: [PATCH] Fix handling of nested objects when running with object_hook or object_pairs_hook in CallScannerNode --- .../src/tests/test_json.py | 10 ++++++- .../modules/json/JSONScannerBuiltins.java | 26 ++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_json.py b/graalpython/com.oracle.graal.python.test/src/tests/test_json.py index a074a5bb72..6c6d5b1337 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_json.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_json.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # The Universal Permissive License (UPL), Version 1.0 @@ -88,3 +88,11 @@ def test_encode_surrogate(self): assert s == '{"foo": "\\uda6a"}' s = json.dumps({'foo': "\uda6a"}, ensure_ascii=False) assert s == '{"foo": "\uda6a"}' + + def test_object_hook_nested(self): + def hook(obj): + return "hooked" + + assert json.loads('{"outer": {"inner": {"leaf": 1}}}', object_hook=hook) == "hooked" + assert json.loads('{"outer": {"inner": {"leaf": 1}}}', object_pairs_hook=hook) == "hooked" + diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONScannerBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONScannerBuiltins.java index b1829a6411..5bcd916cbb 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONScannerBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONScannerBuiltins.java @@ -1,4 +1,4 @@ -/* Copyright (c) 2020, 2025, Oracle and/or its affiliates. +/* Copyright (c) 2020, 2026, Oracle and/or its affiliates. * Copyright (C) 1996-2020 Python Software Foundation * * Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 @@ -445,6 +445,7 @@ private Object scanOnceUnicode(VirtualFrame frame, BoundaryCallData boundaryCall final Object value; ScannerState nextState = null; if (state == ScannerState.dict) { + /* scanner is currently inside a dictionary */ int c; if (idx >= length || (c = codePointAtIndexNode.execute(string, idx)) != '"' && (c != '}')) { throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, this, string, idx, ErrorMessages.EXPECTING_PROP_NAME_ECLOSED_IN_DBL_QUOTES); @@ -457,6 +458,17 @@ private Object scanOnceUnicode(VirtualFrame frame, BoundaryCallData boundaryCall Object topOfStack = stack.pop(); TruffleString parentKey = null; final Object dict; + /* + * If no hooks are present, the stack contains only the parent dicts or + * lists the current object is nested in. If a objectHook or objectPairsHook + * is present, the stack also stores the property keys of nested dicts. For + * example, when parsing the innermost dict of '{"a":{"b":{"c":"d"}}}', the + * stack will contain (from bottom to top) [, , + * "a", , "b"]. This is necessary so after finishing parsing a + * dict, we can call the hook and replace it with the hook's return value in + * the parent dict, i.e. when in the previous example we finish parsing + * '{"c":"d"}', we effectively execute parent["b"] = objectHook({"c":"d"}). + */ if (hasPairsHook) { if (topOfStack instanceof TruffleString) { parentKey = (TruffleString) topOfStack; @@ -484,6 +496,9 @@ private Object scanOnceUnicode(VirtualFrame frame, BoundaryCallData boundaryCall nextState = parentKey == null ? ScannerState.list : ScannerState.dict; if (nextState == ScannerState.dict) { Object parent = stack.peek(); + if (parent instanceof TruffleString) { + parent = stack.get(stack.size() - 2); + } if (hasPairsHook) { currentPairsStorage = (ObjectSequenceStorage) parent; currentPairsStorage.setObjectItemNormalized(currentPairsStorage.length() - 1, PFactory.createTuple(language, new Object[]{parentKey, dict})); @@ -518,6 +533,7 @@ private Object scanOnceUnicode(VirtualFrame frame, BoundaryCallData boundaryCall substringByteIndexNode, appendCodePointNode, appendSubstringByteIndexNode, builderToStringNode); + /* force hash computation */ hashCodeNode.execute(newKey, TS_ENCODING); TruffleString key = memoPutIfAbsent(memo, newKey); if (key == null) { @@ -624,12 +640,20 @@ private Object scanOnceUnicode(VirtualFrame frame, BoundaryCallData boundaryCall assert nextState == ScannerState.dict; currentPairsStorage = (ObjectSequenceStorage) value; if (state == ScannerState.dict) { + /* + * save the associated propertyKey so we can replace the current + * value with the hook's result later + */ stack.add(propertyKey); } } else { assert nextState == ScannerState.dict; currentDictStorage = (EconomicMapStorage) ((PDict) value).getDictStorage(); if (hasObjectHook && state == ScannerState.dict) { + /* + * save the associated propertyKey so we can replace the current + * value with the hook's result later + */ stack.add(propertyKey); } }