 {"id":521091,"date":"2026-06-17T20:43:27","date_gmt":"2026-06-18T03:43:27","guid":{"rendered":"https:\/\/jorgep.com\/blog\/?p=521091"},"modified":"2026-06-17T21:02:56","modified_gmt":"2026-06-18T04:02:56","slug":"beyond-openrouter-what-the-rest-of-the-market-has-to-offer","status":"publish","type":"post","link":"https:\/\/jorgep.com\/blog\/beyond-openrouter-what-the-rest-of-the-market-has-to-offer\/","title":{"rendered":"Beyond OpenRouter: What the rest of the market has to offer"},"content":{"rendered":"\n<div class=\"wp-block-columns has-theme-palette-7-background-color has-background is-layout-flex wp-container-core-columns-is-layout-5dc627e1 wp-block-columns-is-layout-flex\" style=\"margin-top:0;margin-bottom:0\">\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:80%\">\n<p class=\"wp-block-paragraph\">Part of: <strong> <a href=\"https:\/\/jorgep.com\/blog\/series-ai-learnings\/\">AI Learning Series Here<\/a><\/strong><\/p>\n\n\n<style>.kadence-column395113_97b87a-23 > .kt-inside-inner-col,.kadence-column395113_97b87a-23 > .kt-inside-inner-col:before{border-top-left-radius:0px;border-top-right-radius:0px;border-bottom-right-radius:0px;border-bottom-left-radius:0px;}.kadence-column395113_97b87a-23 > .kt-inside-inner-col{column-gap:var(--global-kb-gap-sm, 1rem);}.kadence-column395113_97b87a-23 > .kt-inside-inner-col{flex-direction:column;}.kadence-column395113_97b87a-23 > .kt-inside-inner-col > .aligncenter{width:100%;}.kadence-column395113_97b87a-23 > .kt-inside-inner-col:before{opacity:0.3;}.kadence-column395113_97b87a-23{position:relative;}@media all and (max-width: 1024px){.kadence-column395113_97b87a-23 > .kt-inside-inner-col{flex-direction:column;justify-content:center;}}@media all and (max-width: 767px){.kadence-column395113_97b87a-23 > .kt-inside-inner-col{flex-direction:column;justify-content:center;}}<\/style>\n<div class=\"wp-block-kadence-column kadence-column395113_97b87a-23\"><div class=\"kt-inside-inner-col\"><style>.kadence-column510545_44b637-b5 > .kt-inside-inner-col{padding-top:var(--global-kb-spacing-xs, 1rem);padding-bottom:var(--global-kb-spacing-xs, 1rem);}.kadence-column510545_44b637-b5 > .kt-inside-inner-col,.kadence-column510545_44b637-b5 > .kt-inside-inner-col:before{border-top-left-radius:0px;border-top-right-radius:0px;border-bottom-right-radius:0px;border-bottom-left-radius:0px;}.kadence-column510545_44b637-b5 > .kt-inside-inner-col{column-gap:var(--global-kb-gap-sm, 1rem);}.kadence-column510545_44b637-b5 > .kt-inside-inner-col{flex-direction:column;}.kadence-column510545_44b637-b5 > .kt-inside-inner-col > .aligncenter{width:100%;}.kadence-column510545_44b637-b5 > .kt-inside-inner-col{background-color:var(--global-palette7, #EDF2F7);}.kadence-column510545_44b637-b5:hover > .kt-inside-inner-col{background-color:var(--global-palette8, #F7FAFC);background-image:none;}.kadence-column510545_44b637-b5 > .kt-inside-inner-col:before{opacity:0.3;}.kadence-column510545_44b637-b5{position:relative;}@media all and (max-width: 1024px){.kadence-column510545_44b637-b5 > .kt-inside-inner-col{flex-direction:column;justify-content:center;}}@media all and (max-width: 767px){.kadence-column510545_44b637-b5 > .kt-inside-inner-col{flex-direction:column;justify-content:center;}}<\/style>\n<div class=\"wp-block-kadence-column kadence-column510545_44b637-b5\"><div class=\"kt-inside-inner-col\"><style>.wp-block-kadence-advancedheading.kt-adv-heading510545_f7c5f8-ed, .wp-block-kadence-advancedheading.kt-adv-heading510545_f7c5f8-ed[data-kb-block=\"kb-adv-heading510545_f7c5f8-ed\"]{text-align:center;font-size:var(--global-kb-font-size-md, 1.25rem);font-style:normal;}.wp-block-kadence-advancedheading.kt-adv-heading510545_f7c5f8-ed mark.kt-highlight, .wp-block-kadence-advancedheading.kt-adv-heading510545_f7c5f8-ed[data-kb-block=\"kb-adv-heading510545_f7c5f8-ed\"] mark.kt-highlight{font-style:normal;color:#f76a0c;-webkit-box-decoration-break:clone;box-decoration-break:clone;padding-top:0px;padding-right:0px;padding-bottom:0px;padding-left:0px;}.wp-block-kadence-advancedheading.kt-adv-heading510545_f7c5f8-ed img.kb-inline-image, .wp-block-kadence-advancedheading.kt-adv-heading510545_f7c5f8-ed[data-kb-block=\"kb-adv-heading510545_f7c5f8-ed\"] img.kb-inline-image{width:150px;vertical-align:baseline;}<\/style>\n<p class=\"kt-adv-heading510545_f7c5f8-ed wp-block-kadence-advancedheading\" data-kb-block=\"kb-adv-heading510545_f7c5f8-ed\">Quick Links:&nbsp;<a href=\"https:\/\/jorgep.com\/blog\/resources-for-learning-ai\/\">Resources for Learning AI<\/a> | <a href=\"https:\/\/jorgep.com\/blog\/keeping-up-with-ai\/\">Keep up with AI<\/a> | <a href=\"https:\/\/jorgep.com\/blog\/list-of-ai-tools\/\" data-type=\"post\" data-id=\"402818\">List of AI Tools<\/a> | <a href=\"https:\/\/jorgep.com\/blog\/local-ai-series\/\" data-type=\"page\" data-id=\"519365\">Local AI<\/a> | <a href=\"https:\/\/jorgep.com\/blog\/tag\/ai-agents\/\" data-type=\"post_tag\" data-id=\"941\">AI Agents<\/a> |  <a href=\"https:\/\/jorgep.com\/blog\/work-beyond-tomorrow-series\/\" data-type=\"page\" data-id=\"365001\">Future of Work<\/a><\/p>\n<\/div><\/div>\n<\/div><\/div>\n\n\n<style>.kb-row-layout-id395113_d73e95-0d > .kt-row-column-wrap{align-content:start;}:where(.kb-row-layout-id395113_d73e95-0d > .kt-row-column-wrap) > .wp-block-kadence-column{justify-content:start;}.kb-row-layout-id395113_d73e95-0d > .kt-row-column-wrap{column-gap:var(--global-kb-gap-md, 2rem);row-gap:var(--global-kb-gap-none, 0rem );padding-top:var(--global-kb-spacing-xxs, 0.5rem);padding-bottom:var(--global-kb-spacing-xxs, 0.5rem);grid-template-columns:repeat(2, minmax(0, 1fr));}.kb-row-layout-id395113_d73e95-0d > .kt-row-layout-overlay{opacity:0.30;}@media all and (max-width: 1024px){.kb-row-layout-id395113_d73e95-0d > .kt-row-column-wrap{grid-template-columns:repeat(2, minmax(0, 1fr));}}@media all and (max-width: 767px){.kb-row-layout-id395113_d73e95-0d > .kt-row-column-wrap{grid-template-columns:minmax(0, 1fr);}}<\/style><div class=\"kb-row-layout-wrap kb-row-layout-id395113_d73e95-0d alignnone wp-block-kadence-rowlayout\"><div class=\"kt-row-column-wrap kt-has-2-columns kt-row-layout-equal kt-tab-layout-inherit kt-mobile-layout-row kt-row-valign-top\">\n<style>.kadence-column395113_df36f9-de > .kt-inside-inner-col,.kadence-column395113_df36f9-de > .kt-inside-inner-col:before{border-top-left-radius:0px;border-top-right-radius:0px;border-bottom-right-radius:0px;border-bottom-left-radius:0px;}.kadence-column395113_df36f9-de > .kt-inside-inner-col{column-gap:var(--global-kb-gap-sm, 1rem);}.kadence-column395113_df36f9-de > .kt-inside-inner-col{flex-direction:column;}.kadence-column395113_df36f9-de > .kt-inside-inner-col > .aligncenter{width:100%;}.kadence-column395113_df36f9-de > .kt-inside-inner-col:before{opacity:0.3;}.kadence-column395113_df36f9-de{position:relative;}@media all and (max-width: 1024px){.kadence-column395113_df36f9-de > .kt-inside-inner-col{flex-direction:column;justify-content:center;}}@media all and (max-width: 767px){.kadence-column395113_df36f9-de > .kt-inside-inner-col{flex-direction:column;justify-content:center;}}<\/style>\n<div class=\"wp-block-kadence-column kadence-column395113_df36f9-de\"><div class=\"kt-inside-inner-col\"><style>.wp-block-kadence-advancedheading.kt-adv-heading395113_b3212c-b9, .wp-block-kadence-advancedheading.kt-adv-heading395113_b3212c-b9[data-kb-block=\"kb-adv-heading395113_b3212c-b9\"]{text-align:center;font-size:var(--global-kb-font-size-sm, 0.9rem);line-height:60px;font-style:normal;background-color:#f5a511;}.wp-block-kadence-advancedheading.kt-adv-heading395113_b3212c-b9 mark.kt-highlight, .wp-block-kadence-advancedheading.kt-adv-heading395113_b3212c-b9[data-kb-block=\"kb-adv-heading395113_b3212c-b9\"] mark.kt-highlight{font-style:normal;color:#f76a0c;-webkit-box-decoration-break:clone;box-decoration-break:clone;padding-top:0px;padding-right:0px;padding-bottom:0px;padding-left:0px;}.wp-block-kadence-advancedheading.kt-adv-heading395113_b3212c-b9 img.kb-inline-image, .wp-block-kadence-advancedheading.kt-adv-heading395113_b3212c-b9[data-kb-block=\"kb-adv-heading395113_b3212c-b9\"] img.kb-inline-image{width:150px;vertical-align:baseline;}<\/style>\n<p class=\"kt-adv-heading395113_b3212c-b9 wp-block-kadence-advancedheading\" data-kb-block=\"kb-adv-heading395113_b3212c-b9\">Subscribe to <a href=\"https:\/\/go.35s.be\/jtb\" target=\"_blank\" rel=\"noreferrer noopener\"><strong>JorgeTechBits  newsletter<\/strong><\/a><\/p>\n<\/div><\/div>\n\n\n<style>.kadence-column395113_4b4b81-29 > .kt-inside-inner-col,.kadence-column395113_4b4b81-29 > .kt-inside-inner-col:before{border-top-left-radius:0px;border-top-right-radius:0px;border-bottom-right-radius:0px;border-bottom-left-radius:0px;}.kadence-column395113_4b4b81-29 > .kt-inside-inner-col{column-gap:var(--global-kb-gap-sm, 1rem);}.kadence-column395113_4b4b81-29 > .kt-inside-inner-col{flex-direction:column;}.kadence-column395113_4b4b81-29 > .kt-inside-inner-col > .aligncenter{width:100%;}.kadence-column395113_4b4b81-29 > .kt-inside-inner-col:before{opacity:0.3;}.kadence-column395113_4b4b81-29{position:relative;}@media all and (max-width: 1024px){.kadence-column395113_4b4b81-29 > .kt-inside-inner-col{flex-direction:column;justify-content:center;}}@media all and (max-width: 767px){.kadence-column395113_4b4b81-29 > .kt-inside-inner-col{flex-direction:column;justify-content:center;}}<\/style>\n<div class=\"wp-block-kadence-column kadence-column395113_4b4b81-29\"><div class=\"kt-inside-inner-col\"><\/div><\/div>\n\n<\/div><\/div><\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\"><div class=\"wp-block-image\">\n<figure class=\"aligncenter size-large is-resized\"><a href=\"htthttps:\/\/jorgep.com\/blog\/book-dont-just-chat-delegate\/\"><img loading=\"lazy\" decoding=\"async\" width=\"640\" height=\"1024\" src=\"https:\/\/jorgep.com\/blog\/wp-content\/uploads\/CoverBook-01-640x1024.jpg\" alt=\"\" class=\"wp-image-520234\" style=\"aspect-ratio:0.6250142320391666;width:98px;height:auto\" srcset=\"https:\/\/jorgep.com\/blog\/wp-content\/uploads\/CoverBook-01-640x1024.jpg 640w, https:\/\/jorgep.com\/blog\/wp-content\/uploads\/CoverBook-01-188x300.jpg 188w, https:\/\/jorgep.com\/blog\/wp-content\/uploads\/CoverBook-01-768x1229.jpg 768w, https:\/\/jorgep.com\/blog\/wp-content\/uploads\/CoverBook-01-960x1536.jpg 960w, https:\/\/jorgep.com\/blog\/wp-content\/uploads\/CoverBook-01-1280x2048.jpg 1280w, https:\/\/jorgep.com\/blog\/wp-content\/uploads\/CoverBook-01.jpg 1600w\" sizes=\"auto, (max-width: 640px) 100vw, 640px\" \/><\/a><figcaption class=\"wp-element-caption\"><a href=\"https:\/\/jorgep.com\/blog\/book-series-ai-dont-just-chat\/\" data-type=\"page\" data-id=\"520242\">Check out the Book Series<\/a><\/figcaption><\/figure>\n<\/div><\/div>\n<\/div>\n\n\n<style>.wp-block-kadence-advancedheading.kt-adv-heading519190_b33a00-c9, .wp-block-kadence-advancedheading.kt-adv-heading519190_b33a00-c9[data-kb-block=\"kb-adv-heading519190_b33a00-c9\"]{font-size:var(--global-kb-font-size-sm, 0.9rem);font-style:normal;}.wp-block-kadence-advancedheading.kt-adv-heading519190_b33a00-c9 mark.kt-highlight, .wp-block-kadence-advancedheading.kt-adv-heading519190_b33a00-c9[data-kb-block=\"kb-adv-heading519190_b33a00-c9\"] mark.kt-highlight{font-style:normal;color:#f76a0c;-webkit-box-decoration-break:clone;box-decoration-break:clone;padding-top:0px;padding-right:0px;padding-bottom:0px;padding-left:0px;}.wp-block-kadence-advancedheading.kt-adv-heading519190_b33a00-c9 img.kb-inline-image, .wp-block-kadence-advancedheading.kt-adv-heading519190_b33a00-c9[data-kb-block=\"kb-adv-heading519190_b33a00-c9\"] img.kb-inline-image{width:150px;vertical-align:baseline;}<\/style>\n<p class=\"kt-adv-heading519190_b33a00-c9 wp-block-kadence-advancedheading\" data-kb-block=\"kb-adv-heading519190_b33a00-c9\"><strong>Disclaimer:<\/strong> <strong>I create this content entirely on my own time, and the views expressed here are mine alone (not my employer&#8217;s)<\/strong>. Because I love leveraging new tech, I use AI tools like Gemini, NotebookLM, Claude, Perplexity and others as a &#8220;digital team&#8221; to help research and polish these articles so I can share the best possible insights with you!<\/p>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"p-rc_0a589c126be062d4-72\">Time to revisit <a href=\"https:\/\/jorgep.com\/blog\/the-rise-of-the-enterprise-token-broker\/\" data-type=\"post\" data-id=\"520724\">The Rise of the Enterprise Token Broker<\/a> blog post  The <strong>AI Gateway<\/strong>\u2014the centralized &#8220;Token Broker&#8221;.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\" id=\"p-rc_0a589c126be062d4-72\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">I&#8217;ll be honest: writing this post feels a little like breaking up with someone you genuinely like. OpenRouter has been part of my daily workflow for two and a half years. It solved a real problem, it did it elegantly, and I recommended it to probably a dozen people along the way. This isn&#8217;t a hit piece.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">But as my usage matured and my projects got more serious, I started noticing the edges. And when you start noticing the edges, it&#8217;s usually time to see what else is out there. So I spent a few weeks doing exactly that \u2014 cataloguing every serious alternative I could find, testing the ones worth testing, and organizing the whole picture into something useful.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">Here&#8217;s what I found.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">A love letter (with a few footnotes)<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">When I first started using OpenRouter in late 2023, it solved an immediate and annoying problem. I was juggling API keys for Anthropic, OpenAI, Mistral, and a couple of smaller providers, and the context-switching cost was real. OpenRouter collapsed all of that into a single OpenAI-compatible endpoint, and I was productive again within an afternoon.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">For two and a half years, that was the deal. I&#8217;d spin up a new project, point it at OpenRouter, and immediately have access to essentially every model worth caring about. The model catalog was \u2014 and still is \u2014 unmatched. Switching from <code>claude-sonnet-4-6<\/code> to <code>gpt-4o<\/code> to <code>mistral-large<\/code> was a one-line config change. The latency was fine. The pricing was transparent. It just worked.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">But a few things had been quietly nagging at me.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">The <strong>5.5% fee on every credit purchase<\/strong> doesn&#8217;t sound like much until you do the math.  it can be pricey!  There&#8217;s <strong>no self-hosting option<\/strong>, which matters more now that some of my projects have data residency requirements. Some say the <strong>observability is limited, <\/strong>but I have not really found this to be an issue &#8211; I have several projects,on separate &#8220;spaces&#8221; utilizing several models, and I can run reports no problem! <\/p>\n\n\n\n<p class=\"wp-block-paragraph\">None of these are dealbreakers for a solo developer prototyping on evenings and weekends. But they start to matter when you&#8217;re shipping something real.<\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\"><em>&#8220;For fast experimentation with many models and minimal setup, nothing beats OpenRouter. But as usage matures, certain gaps become harder to ignore.&#8221;<\/em><\/p>\n<\/blockquote>\n\n\n\n<h2 class=\"wp-block-heading\">Background First: <\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">An <strong>AI gateway<\/strong> (also called an LLM router or model gateway) is a layer that sits between your application and the various AI model providers \u2014 OpenAI, Anthropic, Google, Mistral, and so on. Instead of your app talking directly to each provider&#8217;s API, it talks to the gateway, and the gateway handles the routing, fallbacks, logging, and cost tracking on your behalf.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><strong>OpenRouter<\/strong> is the most well-known example. It gives you a single API endpoint and a single API key, and behind the scenes it connects to hundreds of models across dozens of providers. Want to switch from Claude to GPT-4o to Llama? Change one line of config. No new accounts, no new SDKs, no separate billing relationships to manage.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">The services in the table above all solve a variation of the same core problem, but from different angles. Some, like <strong>LiteLLM<\/strong> and <strong>Bifrost<\/strong>, are open-source tools you host yourself \u2014 you get the same unified API experience but with full control over your infrastructure and no platform fees. Others, like <strong>Portkey<\/strong> and <strong>Helicone<\/strong>, are managed products that add a layer of observability and governance on top of whichever providers you&#8217;re already using, giving you per-request logging, cost breakdowns, and guardrails. Then there are <strong>inference providers<\/strong> like <strong>Together AI<\/strong> and <strong>Fireworks AI<\/strong>, which skip the aggregation layer entirely and actually run the models themselves on their own GPU clusters. And on the edges of the category you have tools like <strong>LangChain<\/strong> and <strong>Ray Serve<\/strong>, which are more like full application frameworks where multi-provider routing is one feature among many.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">What ties all of them together is the same underlying insight: the AI model market is fragmented, no single provider is best at everything, and switching costs are high \u2014 so anything that reduces the friction of working across providers has real value.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">So I did the research<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">The market has matured considerably. The &#8220;OpenRouter alternative&#8221; space is no longer just a few scrappy proxies \u2014 it&#8217;s a genuine ecosystem of tools, each with a distinct philosophy about what an AI gateway should be.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">The landscape breaks down into a few clearly distinct categories:<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>Self-Hosting<\/strong> \u2014 run the gateway yourself, zero markup, full data control<\/li>\n\n\n\n<li><strong>Enterprise Gateways<\/strong> \u2014 managed services with production-grade governance<\/li>\n\n\n\n<li><strong>Observability &amp; Analytics<\/strong> \u2014 add tracing and cost intelligence to any stack<\/li>\n\n\n\n<li><strong>Inference Providers<\/strong> \u2014 actually run the models, no middleman<\/li>\n\n\n\n<li><strong>Development Frameworks<\/strong> \u2014 build with LLMs, routing included<\/li>\n\n\n\n<li><strong>Edge &amp; Ecosystem Tools<\/strong> \u2014 purpose-built for specific platforms (Cloudflare, Vercel)<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\">The full list <\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">I have not tried them all BUT  they are there !<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">The dot (\u25cf) marks services that appear most commonly on &#8220;OpenRouter alternatives&#8221; lists. The rest are worth knowing about even if they don&#8217;t show up in every roundup.<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table><thead><tr><th>Service<\/th><th>Description<\/th><th>Category<\/th><\/tr><\/thead><tbody><tr><td><a href=\"https:\/\/litellm.ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">LiteLLM<\/a> \u25cf<\/td><td>Open-source self-hosted proxy routing to 100+ LLM providers via a unified OpenAI-compatible API. Free to self-host; Enterprise tier adds SSO and dedicated support.<\/td><td>Self-Hosting<\/td><\/tr><tr><td><a href=\"https:\/\/portkey.ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Portkey<\/a> \u25cf<\/td><td>Production AI gateway with caching, automatic retries, guardrails, PII redaction, and 1,600+ model support. Strong governance and compliance features. From $49\/mo.<\/td><td>Enterprise Gateway<\/td><\/tr><tr><td><a href=\"https:\/\/developers.cloudflare.com\/ai-gateway\/\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Cloudflare AI Gateway<\/a> \u25cf<\/td><td>Edge-native managed gateway. Analytics, caching, rate limiting, and A\/B model testing built in. Best for teams already on Cloudflare.<\/td><td>Enterprise Gateway<\/td><\/tr><tr><td><a href=\"https:\/\/helicone.ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Helicone<\/a> \u25cf<\/td><td>Observability-first proxy with one-line integration. Semantic caching, cost analytics, and token tracking layered onto any LLM provider.<\/td><td>Observability<\/td><\/tr><tr><td><a href=\"https:\/\/vercel.com\/ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Vercel AI Gateway<\/a> \u25cf<\/td><td>Routing layer tightly integrated with the Vercel AI SDK and Next.js. Supports fallback, observability, and model switching within the Vercel ecosystem.<\/td><td>Ecosystem<\/td><\/tr><tr><td><a href=\"https:\/\/truefoundry.com\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">TrueFoundry<\/a> \u25cf<\/td><td>Enterprise LLM routing and deployment platform with governance, compliance, cost controls, and Kubernetes-native infrastructure. Recognized in Gartner Hype Cycle 2026.<\/td><td>MLOps Platform<\/td><\/tr><tr><td><a href=\"https:\/\/konghq.com\/products\/kong-ai-gateway\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Kong AI Gateway<\/a> \u25cf<\/td><td>AI layer built on the Kong API platform. Enterprise-grade policy enforcement, authentication, traffic control, and RBAC for LLM traffic.<\/td><td>Enterprise Gateway<\/td><\/tr><tr><td><a href=\"https:\/\/requesty.ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Requesty<\/a> \u25cf<\/td><td>Lightweight gateway for simple multi-provider LLM routing with minimal setup. Free plan includes $6 in credits; Pro is pay-as-you-go at a 5% markup.<\/td><td>Lightweight Routing<\/td><\/tr><tr><td><a href=\"https:\/\/together.ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Together AI<\/a> \u25cf<\/td><td>Full-stack inference platform for open-source models. Batch inference (50% discount), dedicated GPU endpoints, fine-tuning, and multi-modal support.<\/td><td>Inference Provider<\/td><\/tr><tr><td><a href=\"https:\/\/edenai.co\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Eden AI<\/a> \u25cf<\/td><td>Aggregates 500+ models across LLMs, OCR, translation, speech, and moderation into a single API. EU-based with GDPR-native data residency. Pay-as-you-go.<\/td><td>Multi-Model Aggregation<\/td><\/tr><tr><td><a href=\"https:\/\/langchain.com\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">LangChain<\/a><\/td><td>Framework for building LLM-powered applications with composable chains, memory, agents, and flexible provider routing.<\/td><td>Framework<\/td><\/tr><tr><td><a href=\"https:\/\/docs.ray.io\/en\/latest\/serve\/index.html\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Ray Serve<\/a><\/td><td>Scalable model-serving framework designed for distributed, high-throughput production inference workloads.<\/td><td>Self-Hosting<\/td><\/tr><tr><td><a href=\"https:\/\/assemblyai.com\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">AssemblyAI<\/a><\/td><td>Specialized API for speech recognition, transcription, audio intelligence, and real-time audio processing.<\/td><td>Specialized Audio<\/td><\/tr><tr><td><a href=\"https:\/\/octoml.ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">OctoML<\/a><\/td><td>Automated model deployment and optimization platform focused on improving inference performance and efficiency.<\/td><td>Model Optimization<\/td><\/tr><tr><td><a href=\"https:\/\/algorithmia.com\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Algorithmia<\/a><\/td><td>AI model deployment and microservices management with a marketplace for sharing and consuming ML algorithms.<\/td><td>Model Deployment<\/td><\/tr><tr><td><a href=\"https:\/\/github.com\/maximhq\/bifrost\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Bifrost<\/a><\/td><td>High-performance open-source AI gateway in Go. Connects 23+ providers, adds just 11\u00b5s overhead at 5,000 RPS. Self-hosted or in-VPC with RBAC and audit logs.<\/td><td>Self-Hosting<\/td><\/tr><tr><td><a href=\"https:\/\/ngrok.com\/ai-gateway\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">ngrok AI Gateway<\/a><\/td><td>Treats AI routing as part of a broader networking layer. Ideal when local model access and network policy need to share a single control plane.<\/td><td>Enterprise Gateway<\/td><\/tr><tr><td><a href=\"https:\/\/orq.ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Orq.ai<\/a><\/td><td>Collaborative platform for shipping LLM features \u2014 prompt versioning, RAG knowledge management, deployment gating, and built-in observability in one workspace.<\/td><td>Observability<\/td><\/tr><tr><td><a href=\"https:\/\/fireworks.ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Fireworks AI<\/a><\/td><td>Inference provider running models on its own GPU clusters with no middleman markup. Fast inference on popular open-source models.<\/td><td>Inference Provider<\/td><\/tr><tr><td><a href=\"https:\/\/puter.com\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Puter.js<\/a><\/td><td>Frontend-focused library for adding AI features to web apps with zero backend or API costs. User-pays model ideal for client-side integrations.<\/td><td>Framework<\/td><\/tr><tr><td><a href=\"https:\/\/replicate.com\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Replicate<\/a><\/td><td>Infrastructure-first platform for running AI models via API. Strong for image, audio, and specialized models. Currently being acquired by Cloudflare.<\/td><td>Inference Provider<\/td><\/tr><tr><td><a href=\"https:\/\/modelz.ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">ModelZ<\/a><\/td><td>Unified API to access and route between various AI models from different providers, with a focus on simplicity and developer experience.<\/td><td>Model Routing<\/td><\/tr><tr><td><a href=\"https:\/\/openpipe.ai\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">OpenPipe<\/a><\/td><td>Focused on fine-tuning and routing for production LLM applications. Lets you collect real request data, fine-tune a cheaper model on it, and route traffic accordingly.<\/td><td>Fine-Tuning &amp; Routing<\/td><\/tr><tr><td><a href=\"https:\/\/bentoml.com\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">BentoML<\/a><\/td><td>Open-source framework for packaging, serving, and routing ML and LLM models in production. Supports multi-model deployments and custom inference pipelines.<\/td><td>Model Serving &amp; Routing<\/td><\/tr><tr><td><a href=\"https:\/\/anyscale.com\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Anyscale Endpoints<\/a><\/td><td>Managed LLM API built on Ray, offering access to open-source models with production-grade scaling, routing, and dedicated endpoints.<\/td><td>Managed LLM Endpoints<\/td><\/tr><tr><td><a href=\"https:\/\/predibase.com\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">Predibase<\/a><\/td><td>Platform for fine-tuning, serving, and routing LLMs with a focus on enterprise deployment. Specializes in LoRA-based fine-tuning at scale.<\/td><td>Enterprise LLM Platform<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Five that genuinely surprised me<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">I went researching, have not yet tried them all, but I went in expecting most of these to be slight variations on the same theme. A few stood out as genuinely different in approach.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">Bifrost \u2014 the performance case<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">If raw throughput matters to you, Bifrost&#8217;s numbers are hard to ignore: 11 microseconds of overhead at 5,000 requests per second, versus 25\u201340ms for a managed service like OpenRouter. It&#8217;s open-source, self-hostable as a single Go binary or Docker container, and connects to 23+ providers. The zero-markup model means you pay providers at list rate with no platform surcharge. Worth a serious look if you&#8217;re running agentic workloads where latency compounds.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">Eden AI \u2014 the compliance case<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Most LLM gateways are US-based by default, which creates friction for teams with GDPR or data residency requirements. Eden AI is headquartered in France and is GDPR-native with EU data residency out of the box \u2014 not an add-on. It also goes well beyond LLM routing: one API gives access to OCR, translation, speech, and moderation services. If you&#8217;re building something that touches multiple AI modalities and you need EU compliance, this is the obvious shortlist candidate.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">Portkey \u2014 the production ops case<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">Portkey is what OpenRouter would be if it had been designed from day one for teams rather than individual developers. The observability is genuinely impressive: granular logs, per-user cost tracking, prompt versioning, PII redaction, jailbreak detection, and full audit trails. The 1,600+ model support is almost beside the point \u2014 the real value is the control plane. Free tier available; production starts at $49\/month.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">Together AI \u2014 the inference case<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">OpenRouter is an aggregator; Together AI actually runs the models. That distinction matters when you need batch inference (at a 50% discount versus real-time pricing), dedicated GPU endpoints, or fine-tuning capabilities. The catch is that it&#8217;s essentially open-source models only \u2014 no GPT-4, no Claude. But for teams building on Llama, Mistral, or Qwen variants, this is a more direct path than routing through an aggregation layer.<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">TrueFoundry \u2014 the MLOps case<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">If you have ML engineers and existing model pipelines, TrueFoundry&#8217;s angle is different from all of the above. It&#8217;s not primarily a gateway \u2014 it&#8217;s an MLOps platform where the gateway is one component among autoscaling, model registry, experiment tracking, and Kubernetes-native deployment. The recent Gartner Hype Cycle recognition suggests it&#8217;s landing well with enterprise platform teams.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">What I&#8217;m actually doing<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">I&#8217;m not leaving OpenRouter entirely. For rapid prototyping and early-stage projects, it&#8217;s still the fastest way to get multi-model access. But I&#8217;m layering <strong>Helicone<\/strong> on top for observability \u2014 one URL change, and the semantic caching has already cut some of my repeat-query costs noticeably.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">For the one project where latency actually matters, I&#8217;m running a <strong>Bifrost<\/strong> instance internally.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">For anything that needs to go to production seriously, <strong>Portkey<\/strong> is where I&#8217;m leaning. The governance features are genuinely useful once you&#8217;re managing more than one person&#8217;s API access and need to track costs by team or project.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">For my local AI machines I have setup LiteLLM   and using directly with some LLM vendors   <\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\"><em>&#8220;There&#8217;s no single winner for everyone. The right tool is the one that matches the level of control, simplicity, and operational ownership your team actually needs.&#8221;<\/em><\/p>\n<\/blockquote>\n\n\n\n<h2 class=\"wp-block-heading\">Key takeaways<\/h2>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>If you need\u2026<\/th><th>Consider\u2026<\/th><\/tr><\/thead><tbody><tr><td>Fast prototyping, widest model catalog<\/td><td>OpenRouter (stay)<\/td><\/tr><tr><td>Self-hosting, zero markup<\/td><td>LiteLLM or Bifrost<\/td><\/tr><tr><td>Observability with minimal effort<\/td><td>Helicone<\/td><\/tr><tr><td>EU data residency, multi-modal AI<\/td><td>Eden AI<\/td><\/tr><tr><td>Production governance, team management<\/td><td>Portkey<\/td><\/tr><tr><td>Open-source model fine-tuning or batch jobs<\/td><td>Together AI<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">The space moves fast. If I&#8217;ve missed something worth including, or if your experience with any of these differs from what I&#8217;ve described \u2014 I&#8217;d genuinely like to know.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><em>Pricing details reflect public documentation as of June 2026 and may change.<\/em><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><br><\/p>\n\n\n<style>.wp-block-kadence-advancedheading.kt-adv-heading407818_afcbba-c7, .wp-block-kadence-advancedheading.kt-adv-heading407818_afcbba-c7[data-kb-block=\"kb-adv-heading407818_afcbba-c7\"]{font-size:var(--global-kb-font-size-sm, 0.9rem);font-style:normal;}.wp-block-kadence-advancedheading.kt-adv-heading407818_afcbba-c7 mark.kt-highlight, .wp-block-kadence-advancedheading.kt-adv-heading407818_afcbba-c7[data-kb-block=\"kb-adv-heading407818_afcbba-c7\"] mark.kt-highlight{font-style:normal;color:#f76a0c;-webkit-box-decoration-break:clone;box-decoration-break:clone;padding-top:0px;padding-right:0px;padding-bottom:0px;padding-left:0px;}.wp-block-kadence-advancedheading.kt-adv-heading407818_afcbba-c7 img.kb-inline-image, .wp-block-kadence-advancedheading.kt-adv-heading407818_afcbba-c7[data-kb-block=\"kb-adv-heading407818_afcbba-c7\"] img.kb-inline-image{width:150px;vertical-align:baseline;}<\/style>\n<p class=\"kt-adv-heading407818_afcbba-c7 wp-block-kadence-advancedheading\" data-kb-block=\"kb-adv-heading407818_afcbba-c7\">Have questions, ideas to share, or just want to connect? I\u2019d love to hear from you! Check out my <a href=\"https:\/\/jorgep.com\/blog\/about\/\">About Page<\/a> to learn more about me or connect with me.<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Do you have ideas of comments? <\/h2>\n\n\n<style>.wp-block-kadence-advancedheading.kt-adv-heading517671_7a57f6-b7, .wp-block-kadence-advancedheading.kt-adv-heading517671_7a57f6-b7[data-kb-block=\"kb-adv-heading517671_7a57f6-b7\"]{font-size:var(--global-kb-font-size-md, 1.25rem);font-style:normal;}.wp-block-kadence-advancedheading.kt-adv-heading517671_7a57f6-b7 mark.kt-highlight, .wp-block-kadence-advancedheading.kt-adv-heading517671_7a57f6-b7[data-kb-block=\"kb-adv-heading517671_7a57f6-b7\"] mark.kt-highlight{font-style:normal;color:#f76a0c;-webkit-box-decoration-break:clone;box-decoration-break:clone;padding-top:0px;padding-right:0px;padding-bottom:0px;padding-left:0px;}.wp-block-kadence-advancedheading.kt-adv-heading517671_7a57f6-b7 img.kb-inline-image, .wp-block-kadence-advancedheading.kt-adv-heading517671_7a57f6-b7[data-kb-block=\"kb-adv-heading517671_7a57f6-b7\"] img.kb-inline-image{width:150px;vertical-align:baseline;}<\/style>\n<p class=\"kt-adv-heading517671_7a57f6-b7 wp-block-kadence-advancedheading\" data-kb-block=\"kb-adv-heading517671_7a57f6-b7\">Please let me know!   Send me a note to my X account: <a href=\"https:\/\/x.com\/jorper98\" data-type=\"link\" data-id=\"https:\/\/x.com\/jorper98\" target=\"_blank\" rel=\"noopener noreferrer nofollow\">@jorper98<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>Time to revisit The Rise of the Enterprise Token Broker blog post The AI Gateway\u2014the centralized &#8220;Token Broker&#8221;. I&#8217;ll be honest: writing this post feels a little like breaking up with someone you genuinely like. OpenRouter has been part of my daily workflow for two and a half years. It solved a real problem, it&#8230;<\/p>\n","protected":false},"author":2,"featured_media":427864,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_kad_blocks_custom_css":"","_kad_blocks_head_custom_js":"","_kad_blocks_body_custom_js":"","_kad_blocks_footer_custom_js":"","ngg_post_thumbnail":0,"episode_type":"","audio_file":"","podmotor_file_id":"","podmotor_episode_id":"","cover_image":"","cover_image_id":"","duration":"","filesize":"","filesize_raw":"","date_recorded":"","explicit":"","block":"","itunes_episode_number":"","itunes_title":"","itunes_season_number":"","itunes_episode_type":"","_kad_post_transparent":"","_kad_post_title":"","_kad_post_layout":"","_kad_post_sidebar_id":"","_kad_post_content_style":"","_kad_post_vertical_padding":"","_kad_post_feature":"","_kad_post_feature_position":"","_kad_post_header":false,"_kad_post_footer":false,"_kad_post_classname":"","footnotes":""},"categories":[1031,441],"tags":[471,941,930,986],"class_list":["post-521091","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-ai-learnings-series","category-tech-talk","tag-ai","tag-ai-agents","tag-ai-series","tag-local-ai"],"taxonomy_info":{"category":[{"value":1031,"label":"AI Learnings Series"},{"value":441,"label":"Tech Talk"}],"post_tag":[{"value":471,"label":"AI"},{"value":941,"label":"AI Agents"},{"value":930,"label":"AI Series"},{"value":986,"label":"Local AI"}]},"featured_image_src_large":["https:\/\/jorgep.com\/blog\/wp-content\/uploads\/FeaturedImage-Topic-AI-1024x512.png",1024,512,true],"author_info":{"display_name":"Jorge Pereira","author_link":"https:\/\/jorgep.com\/blog\/author\/jorge\/"},"comment_info":0,"category_info":[{"term_id":1031,"name":"AI Learnings Series","slug":"ai-learnings-series","term_group":0,"term_taxonomy_id":1041,"taxonomy":"category","description":"","parent":0,"count":32,"filter":"raw","cat_ID":1031,"category_count":32,"category_description":"","cat_name":"AI Learnings Series","category_nicename":"ai-learnings-series","category_parent":0},{"term_id":441,"name":"Tech Talk","slug":"tech-talk","term_group":0,"term_taxonomy_id":451,"taxonomy":"category","description":"","parent":0,"count":728,"filter":"raw","cat_ID":441,"category_count":728,"category_description":"","cat_name":"Tech Talk","category_nicename":"tech-talk","category_parent":0}],"tag_info":[{"term_id":471,"name":"AI","slug":"ai","term_group":0,"term_taxonomy_id":481,"taxonomy":"post_tag","description":"","parent":0,"count":183,"filter":"raw"},{"term_id":941,"name":"AI Agents","slug":"ai-agents","term_group":0,"term_taxonomy_id":951,"taxonomy":"post_tag","description":"","parent":0,"count":142,"filter":"raw"},{"term_id":930,"name":"AI Series","slug":"ai-series","term_group":0,"term_taxonomy_id":940,"taxonomy":"post_tag","description":"","parent":0,"count":191,"filter":"raw"},{"term_id":986,"name":"Local AI","slug":"local-ai","term_group":0,"term_taxonomy_id":996,"taxonomy":"post_tag","description":"","parent":0,"count":52,"filter":"raw"}],"_links":{"self":[{"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/posts\/521091","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/comments?post=521091"}],"version-history":[{"count":4,"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/posts\/521091\/revisions"}],"predecessor-version":[{"id":521097,"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/posts\/521091\/revisions\/521097"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/media\/427864"}],"wp:attachment":[{"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/media?parent=521091"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/categories?post=521091"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/jorgep.com\/blog\/wp-json\/wp\/v2\/tags?post=521091"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}