<!doctype html><html lang="en" class="no-js"><head><meta charset="utf-8"> <!-- begin SEO --><title>Belief-Based Offline Reinforcement Learning for Delay-Robust Policy Optimization - Home</title><meta name="description" content="This paper introduces Shop-R1, a novel reinforcement learning framework aimed at enhancing the reasoning ability of LLMs for simulation of real human behavior in online shopping environments through a two-stage approach with distinct reward signals."><meta property="article:published_time" content="2026-01-23T00:00:00+00:00"><link rel="canonical" href="https://simon-zhan.com/publication/2026-iclr-dt-corl.md"> <script type="application/ld+json"> { "@context" : "http://schema.org", "@type" : "Person", "name" : "Simon Zhan", "url" : "https://simon-zhan.com", "sameAs" : null } </script> <!-- end SEO --> <!-- Open Graph protocol data (https://ogp.me/), used by social media --><meta property="og:locale" content="en-US"><meta property="og:site_name" content="Home"><meta property="og:title" content="Belief-Based Offline Reinforcement Learning for Delay-Robust Policy Optimization"><meta property="og:type" content="article"><meta property="og:description" name="description" content="This paper introduces Shop-R1, a novel reinforcement learning framework aimed at enhancing the reasoning ability of LLMs for simulation of real human behavior in online shopping environments through a two-stage approach with distinct reward signals."><meta property="og:url" content="https://simon-zhan.com/publication/2026-iclr-dt-corl.md"> <!-- end Open Graph protocol --><link href="https://simon-zhan.com/feed.xml" type="application/atom+xml" rel="alternate" title="Home Feed"> <!-- http://t.co/dKP3o1e --><meta name="HandheldFriendly" content="True"><meta name="MobileOptimized" content="320"><meta name="viewport" content="width=device-width, initial-scale=1.0"> <script> document.documentElement.className = document.documentElement.className.replace(/\bno-js\b/g, '') + ' js '; </script> <!-- For all browsers --><link rel="stylesheet" href="https://simon-zhan.com/assets/css/main.css"><meta http-equiv="cleartype" content="on"> <!-- start custom head snippets --> <!-- Support for Academicons --><link rel="stylesheet" href="https://simon-zhan.com/assets/css/academicons.css"/> <!-- favicon from https://commons.wikimedia.org/wiki/File:OOjs_UI_icon_academic-progressive.svg --><link rel="apple-touch-icon" sizes="180x180" href="https://simon-zhan.com/images/apple-touch-icon-180x180.png"/><link rel="icon" type="image/svg+xml" href="https://simon-zhan.com/images/favicon.svg"/><link rel="icon" type="image/png" href="https://simon-zhan.com/images/favicon-32x32.png" sizes="32x32"/><link rel="icon" type="image/png" href="https://simon-zhan.com/images/favicon-192x192.png" sizes="192x192"/><link rel="manifest" href="https://simon-zhan.com/images/manifest.json"/><link rel="icon" href="/images/favicon.ico"/><meta name="theme-color" content="#ffffff"/> <!-- end custom head snippets --></head><body> <!--[if lt IE 9]><div class="notice--danger align-center" style="margin: 0;">You are using an <strong>outdated</strong> browser. Please <a href="http://browsehappy.com/">upgrade your browser</a> to improve your experience.</div><![endif]--><div class="masthead"><div class="masthead__inner-wrap"><div class="masthead__menu"><nav id="site-nav" class="greedy-nav"> <button><div class="navicon"></div></button><ul class="visible-links"><li class="masthead__menu-item masthead__menu-item--lg persist"><a href="https://simon-zhan.com/">Home</a></li><li class="masthead__menu-item"><a href="https://simon-zhan.com/publications/">Publications</a></li><li class="masthead__menu-item"><a href="https://simon-zhan.com/mentoring/">Mentoring</a></li><li class="masthead__menu-item"><a href="https://simon-zhan.com/year-archive/">Blog Posts</a></li><li class="masthead__menu-item"><a href="https://simon-zhan.com/cv/">CV</a></li><li id="theme-toggle" class="masthead__menu-item persist tail"> <a role="button" aria-labelledby="theme-icon"><i id="theme-icon" class="fa-solid fa-sun" aria-hidden="true" title="toggle theme"></i></a></li></ul><ul class="hidden-links hidden"></ul></nav></div></div></div><div id="main" role="main"><div class="sidebar sticky"><div itemscope itemtype="http://schema.org/Person"><div class="author__avatar"> <img src="https://simon-zhan.com/images/simonzhan.jpg" class="author__avatar" alt="Sinong (Simon) Zhan" fetchpriority="high" /></div><div class="author__content"><h3 class="author__name">Sinong (Simon) Zhan</h3><p class="author__bio">PhD student at Northwestern University</p></div><div class="author__urls-wrapper"> <button class="btn btn--inverse">Follow</button><ul class="author__urls social-icons"> <!-- Font Awesome icons / Biographic information --><li class="author__desktop"><i class="fas fa-fw fa-location-dot icon-pad-right" aria-hidden="true"></i>Evanston, IL</li><li class="author__desktop"><i class="fas fa-fw fa-building-columns icon-pad-right" aria-hidden="true"></i>Northwestern University</li><li><a href="mailto:SinongZhan2028@u.northwestern.edu"><i class="fas fa-fw fa-envelope icon-pad-right" aria-hidden="true"></i>Email</a></li><!-- Font Awesome and Academicons icons / Academic websites --><li><a href="https://scholar.google.com/citations?user=uO4dG0wAAAAJ&hl=en"><i class="ai ai-google-scholar ai-fw icon-pad-right"></i>Google Scholar</a></li><!-- Font Awesome icons / Repositories and software development --><li><a href="https://github.com/SimonZhan-code"><i class="fab fa-fw fa-github icon-pad-right" aria-hidden="true"></i>GitHub</a></li><!-- Font Awesome icons / Social media --><li><a href="https://twitter.com/SimonZHAN7"><i class="fab fa-fw fa-x-twitter icon-pad-right" aria-hidden="true"></i>X (formerly Twitter)</a></li></ul></div></div></div><article class="page" itemscope itemtype="http://schema.org/CreativeWork"><meta itemprop="headline" content="Belief-Based Offline Reinforcement Learning for Delay-Robust Policy Optimization"><meta itemprop="description" content="This paper introduces Shop-R1, a novel reinforcement learning framework aimed at enhancing the reasoning ability of LLMs for simulation of real human behavior in online shopping environments through a two-stage approach with distinct reward signals."><meta itemprop="datePublished" content="January 23, 2026"><div class="page__inner-wrap"><header><h1 class="page__title" itemprop="headline">Belief-Based Offline Reinforcement Learning for Delay-Robust Policy Optimization</h1><p>Published in <i>ICLR 2026</i>, 2026</p></header><section class="page__content" itemprop="text"><p>Offline–to–online deployment of reinforcement learning (RL) agents often stumbles over two fundamental gaps: (1) the sim-to-real gap, where real-world systems exhibit latency and other physical imperfections not captured in simulation; and (2) the interaction gap, where policies trained purely offline face out-of-distribution (OOD) issues during online execution, as collecting new interaction data is costly or risky. As a result, agents must generalize from static, delay-free datasets to dynamic, delay-prone environments. In this work, we propose DT-CORL(Delay-Transformer belief policy Constrained Offline RL), a novel framework for learning delay-resilient policies solely from static, delay-free offline data. DT-CORL introduces a transformer-based belief model to infer latent states from delayed observations and jointly trains this belief with a constrained policy objective, ensuring that value estimation and belief representation remain aligned throughout learning. Crucially, our method does not require access to delayed transitions during training and outperforms naive history-augmented baselines, SOTA delayed RL methods, and existing belief-based approaches. Empirically, we demonstrate that DT-CORL achieves strong delay-robust generalization across both locomotion and goal-conditioned tasks in the D4RL benchmark under varying delay regimes. Our results highlight that joint belief-policy optimization is essential for bridging the sim-to-real latency gap and achieving stable performance in delayed environments.</p><p><strong>Authors:</strong> Simon Sinong Zhan, Qingyuan Wu, Philip Wang, Frank Yang, Xiangyu Shi, Chao Huang, Qi Zhu</p><div class="citation-section"><h3>Citation</h3><div class="citation-container"> <button class="copy-btn" onclick="copyBibtex()" title="Copy BibTeX to clipboard"> <i class="fas fa-copy"></i> Copy BibTeX </button><pre class="bibtex-code" id="bibtex-content"><code>@article{zhan2025adapting, title={Adapting Offline Reinforcement Learning with Online Delays}, author={Zhan, Simon Sinong and Wu, Qingyuan and Yang, Frank and Shi, Xiangyu and Huang, Chao and Zhu, Qi}, journal={arXiv preprint arXiv:2506.00131}, year={2025} }</code></pre></div></div><div class="download-links"> <a href="https://arxiv.org/pdf/2506.00131" class="btn btn--primary"> <i class="fas fa-file-pdf"></i> Download Paper </a></div></section><footer class="page__meta"></footer><section class="page__share"><h4 class="page__share-title">Share on</h4><a href="https://bsky.app/intent/compose?text=https://simon-zhan.com/publication/2026-iclr-dt-corl.md" class="btn btn--bluesky" title="Share on Bluesky"><i class="fab fa-bluesky" aria-hidden="true"></i><span> Bluesky</span></a> <a href="https://www.facebook.com/sharer/sharer.php?u=https://simon-zhan.com/publication/2026-iclr-dt-corl.md" class="btn btn--facebook" title="Share on Facebook"><i class="fab fa-facebook" aria-hidden="true"></i><span> Facebook</span></a> <a href="https://www.linkedin.com/shareArticle?mini=true&url=https://simon-zhan.com/publication/2026-iclr-dt-corl.md" class="btn btn--linkedin" title="Share on LinkedIn"><i class="fab fa-linkedin" aria-hidden="true"></i><span> LinkedIn</span></a> <a href="https://x.com/intent/post?text=https://simon-zhan.com/publication/2026-iclr-dt-corl.md" class="btn btn--x" title="Share on X"><i class="fab fa-x-twitter" aria-hidden="true"></i><span> X (formerly Twitter)</span></a></section><nav class="pagination"> <a href="https://simon-zhan.com/publication/2025-shop-r1-llm-shopping" class="pagination--pager" title="Shop-R1: Rewarding LLMs to Simulate Human Behavior in Online Shopping via Reinforcement Learning ">Previous</a> <a href="https://simon-zhan.com/publication/2026-l4dc-model-based-irl" class="pagination--pager" title="Enhancing Inverse Reinforcement Learning through Encoding Dynamic Information in Reward Shaping ">Next</a></nav></div></article></div><script> function copyBibtex() { const bibtexContent = document.getElementById('bibtex-content').innerText; navigator.clipboard.writeText(bibtexContent).then(function() { // Success feedback const btn = document.querySelector('.copy-btn'); const originalText = btn.innerHTML; btn.innerHTML = '<i class="fas fa-check"></i> Copied!'; btn.style.backgroundColor = '#28a745'; setTimeout(function() { btn.innerHTML = originalText; btn.style.backgroundColor = ''; }, 2000); }, function(err) { // Fallback for older browsers const textArea = document.createElement('textarea'); textArea.value = bibtexContent; document.body.appendChild(textArea); textArea.select(); document.execCommand('copy'); document.body.removeChild(textArea); // Success feedback const btn = document.querySelector('.copy-btn'); const originalText = btn.innerHTML; btn.innerHTML = '<i class="fas fa-check"></i> Copied!'; btn.style.backgroundColor = '#28a745'; setTimeout(function() { btn.innerHTML = originalText; btn.style.backgroundColor = ''; }, 2000); }); } </script><div class="page__footer"><footer> <!-- start custom footer snippets --> <a href="/sitemap/">Sitemap</a> <!-- Support for MatJax --> <script defer src="https://cdnjs.cloudflare.com/polyfill/v3/polyfill.min.js?features=es6"></script> <script defer src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" id="MathJax-script"></script> <!-- Support for Plotly --> <script defer src='https://cdnjs.cloudflare.com/ajax/libs/plotly.js/3.0.1/plotly.min.js'></script> <!-- Support for Mermaid --> <script type="module"> import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs'; mermaid.initialize({startOnLoad:true, theme:'default'}); await mermaid.run({querySelector:'code.language-mermaid'}); </script> <!-- end custom footer snippets --><div class="page__footer-follow"><ul class="social-icons"><li><strong>Follow:</strong></li><li><a href="http://github.com/SimonZhan-code"><i class="fab fa-github" aria-hidden="true"></i> GitHub</a></li><li><a href="https://simon-zhan.com/feed.xml"><i class="fa fa-fw fa-rss-square" aria-hidden="true"></i> Feed</a></li></ul></div><div class="page__footer-copyright"> &copy; 2026 Simon Zhan, Powered by <a href="http://jekyllrb.com" rel="nofollow">Jekyll</a> &amp; <a href="https://github.com/academicpages/academicpages.github.io">AcademicPages</a>, a fork of <a href="https://mademistakes.com/work/minimal-mistakes-jekyll-theme/" rel="nofollow">Minimal Mistakes</a>.<br /> Site last updated 2026-03-09</div></footer></div><script type="module" src="https://simon-zhan.com/assets/js/main.min.js"></script></body></html>
