[{"data":1,"prerenderedAt":455},["ShallowReactive",2],{"navigation":3,"\u002Fproviders\u002Fdeepseek":99,"\u002Fproviders\u002Fdeepseek-surround":450},[4,23,44,78],{"title":5,"path":6,"stem":7,"children":8,"icon":22},"Getting Started","\u002Fgetting-started","1.getting-started\u002F1.index",[9,12,17],{"title":10,"path":6,"stem":7,"icon":11},"Introduction","i-lucide-house",{"title":13,"path":14,"stem":15,"icon":16},"Installation","\u002Fgetting-started\u002Finstallation","1.getting-started\u002F2.installation","i-lucide-download",{"title":18,"path":19,"stem":20,"icon":21},"MCP Setup","\u002Fgetting-started\u002Fmcp-setup","1.getting-started\u002F3.mcp-setup","i-lucide-plug","i-lucide-rocket",{"title":24,"icon":25,"path":26,"stem":27,"children":28,"page":43},"CLI","i-lucide-terminal","\u002Fcli","2.cli",[29,33,38],{"title":30,"path":31,"stem":32,"icon":25},"Usage","\u002Fcli\u002Fusage","2.cli\u002F1.usage",{"title":34,"path":35,"stem":36,"icon":37},"Options","\u002Fcli\u002Foptions","2.cli\u002F2.options","i-lucide-sliders-horizontal",{"title":39,"path":40,"stem":41,"icon":42},"Batch & JSON","\u002Fcli\u002Fbatch-json","2.cli\u002F3.batch-json","i-lucide-package",false,{"title":45,"icon":46,"path":47,"stem":48,"children":49,"page":43},"Providers","i-lucide-cpu","\u002Fproviders","3.providers",[50,55,60,65,70,74],{"title":51,"path":52,"stem":53,"icon":54},"Claude (Area-Based)","\u002Fproviders\u002Fclaude","3.providers\u002F1.claude","i-lucide-square",{"title":56,"path":57,"stem":58,"icon":59},"GPT-4o & GPT-5 (Tiling)","\u002Fproviders\u002Fgpt","3.providers\u002F2.gpt","i-lucide-grid-2x2",{"title":61,"path":62,"stem":63,"icon":64},"Gemini (Large Tiles)","\u002Fproviders\u002Fgemini","3.providers\u002F3.gemini","i-lucide-grid-3x3",{"title":66,"path":67,"stem":68,"icon":69},"Llama Vision (Tiles)","\u002Fproviders\u002Fllama","3.providers\u002F4.llama","i-simple-icons-meta",{"title":71,"path":72,"stem":73,"icon":64},"Qwen-VL (Patch Grid)","\u002Fproviders\u002Fqwen","3.providers\u002F5.qwen",{"title":75,"path":76,"stem":77,"icon":59},"DeepSeek-VL (Open Weights)","\u002Fproviders\u002Fdeepseek","3.providers\u002F6.deepseek",{"title":79,"icon":80,"path":81,"stem":82,"children":83,"page":43},"Guides","i-lucide-book-open","\u002Fguides","4.guides",[84,89,94],{"title":85,"path":86,"stem":87,"icon":88},"Python Bindings","\u002Fguides\u002Fpython-bindings","4.guides\u002F1.python-bindings","i-lucide-file-code",{"title":90,"path":91,"stem":92,"icon":93},"Sandbox (Think in Code)","\u002Fguides\u002Fsandbox","4.guides\u002F2.sandbox","i-lucide-flask-conical",{"title":95,"path":96,"stem":97,"icon":98},"Crawler Integration","\u002Fguides\u002Fcrawler-integration","4.guides\u002F3.crawler-integration","i-lucide-globe",{"id":100,"title":75,"body":101,"description":443,"extension":444,"links":445,"meta":446,"navigation":447,"path":76,"seo":448,"stem":77,"__hash__":449},"docs\u002F3.providers\u002F6.deepseek.md",{"type":102,"value":103,"toc":435},"minimark",[104,130,135,158,165,175,179,244,251,255,262,289,307,311,314,376,382,386,431],[105,106,109],"callout",{"color":107,"icon":108},"warning","i-lucide-triangle-alert",[110,111,112,116,117,121,122,125,126,129],"p",{},[113,114,115],"strong",{},"Open-weights, not a billed API."," At time of writing DeepSeek's public API (",[118,119,120],"code",{},"deepseek-chat"," \u002F ",[118,123,124],{},"deepseek-reasoner",") is text-only — DeepSeek-VL2 ships as open weights. So optimizing here saves your ",[113,127,128],{},"local-inference context budget and latency",", not API dollars. Verify if\u002Fwhen a vision endpoint ships.",[131,132,134],"h2",{"id":133},"how-deepseek-vl2-processes-images","How DeepSeek-VL2 processes images",[110,136,137,138,141,142,145,146,149,150,153,154,157],{},"DeepSeek-VL2 uses a ",[113,139,140],{},"384×384 global view"," plus ",[113,143,144],{},"dynamic local tiles"," on an anyres canvas of ",[118,147,148],{},"(m·384, n·384)"," with ",[118,151,152],{},"m·n ≤ 9",". The encoder is SigLIP-SO400M-384 (14px patch) with a 2× downsample, giving a per-view grid side of ",[118,155,156],{},"h = ⌈(384\u002F14)\u002F2⌉ = 14",".",[110,159,160,161,164],{},"The exact token count (from ",[118,162,163],{},"tokenize_with_images","):",[166,167,172],"pre",{"className":168,"code":170,"language":171},[169],"language-text","h          = 14\nglobal view = h·(h+1) = 210          # +1 per row = line separator\nseparator   = 1\nlocal tiles = (nh·h)·(nw·h + 1)      # nw·nh ≤ 9\ntokens      = 210 + 1 + local\n","text",[118,173,170],{"__ignoreMap":174},"",[131,176,178],{"id":177},"the-384px-boundary","The 384px boundary",[180,181,182,198],"table",{},[183,184,185],"thead",{},[186,187,188,192,195],"tr",{},[189,190,191],"th",{},"Image",[189,193,194],{},"nw × nh",[189,196,197],{},"Tokens",[199,200,201,216,230],"tbody",{},[186,202,203,207,210],{},[204,205,206],"td",{},"≤ 384×384",[204,208,209],{},"1×1",[204,211,212,213],{},"211 + 210 = ",[113,214,215],{},"421",[186,217,218,221,224],{},[204,219,220],{},"768×768",[204,222,223],{},"2×2",[204,225,226,227],{},"211 + 28·29 = ",[113,228,229],{},"1,023",[186,231,232,235,238],{},[204,233,234],{},"1152×1152",[204,236,237],{},"3×3",[204,239,240,241],{},"211 + 42·43 = ",[113,242,243],{},"2,017",[110,245,246,247,250],{},"Snapping each side down to the 384px grid keeps ",[118,248,249],{},"nw·nh"," (and the token bill) minimal.",[131,252,254],{"id":253},"optimization-strategy","Optimization strategy",[110,256,257,258,261],{},"≤384px stays a single tile; otherwise snap each side ",[113,259,260],{},"down"," to the 384px grid.",[166,263,268],{"className":264,"code":265,"filename":266,"language":267,"meta":174,"style":174},"language-bash shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","vision-squeezer image.png --model deepseek\n","Terminal","bash",[118,269,270],{"__ignoreMap":174},[271,272,275,279,283,286],"span",{"class":273,"line":274},"line",1,[271,276,278],{"class":277},"sBMFI","vision-squeezer",[271,280,282],{"class":281},"sfazB"," image.png",[271,284,285],{"class":281}," --model",[271,287,288],{"class":281}," deepseek\n",[110,290,291,292,295,296,299,300,303,304,157],{},"CLI aliases: ",[118,293,294],{},"deepseek",", ",[118,297,298],{},"deepseek-vl",". MCP ",[118,301,302],{},"target_model",": ",[118,305,306],{},"\"deepseek\"",[131,308,310],{"id":309},"token-savings","Token savings",[110,312,313],{},"Crossing a 384px boundary adds a whole tile row or column — snapping back undoes it.",[180,315,316,332],{},[183,317,318],{},[186,319,320,323,326,329],{},[189,321,322],{},"Scenario",[189,324,325],{},"Before",[189,327,328],{},"After",[189,330,331],{},"Saved",[199,333,334,356],{},[186,335,336,339,345,351],{},[204,337,338],{},"800×768 → snap to 768×768",[204,340,341,342],{},"3×2 tiles · ",[113,343,344],{},"1,415 tok",[204,346,347,348],{},"2×2 tiles · ",[113,349,350],{},"1,023 tok",[204,352,353],{},[113,354,355],{},"−28%",[186,357,358,361,366,371],{},[204,359,360],{},"385×384 (1px over a tile) → 384×384",[204,362,363],{},[113,364,365],{},"617 tok",[204,367,368],{},[113,369,370],{},"421 tok",[204,372,373],{},[113,374,375],{},"−32%",[110,377,378,379,381],{},"A single pixel past a 384px edge can cost ~30% more. Snapping each side down to the grid keeps ",[118,380,249],{}," minimal.",[131,383,385],{"id":384},"source","Source",[110,387,388,389,392,393,397,398,405,406,409,410,413,414,295,417,295,420,423,424,427,428,157],{},"Formula taken verbatim from the ",[113,390,391],{},"DeepSeek-VL2"," technical report, §2 ",[394,395,396],"em",{},"Model Architecture"," (",[399,400,404],"a",{"href":401,"rel":402},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2412.10302",[403],"nofollow","arXiv:2412.10302",", submitted ",[113,407,408],{},"13 Dec 2024",") and the reference implementation ",[118,411,412],{},"processing_deepseek_vl_v2.py",". Grid constants (",[118,415,416],{},"patch_size 14",[118,418,419],{},"siglip_so400m_patch14_384",[118,421,422],{},"candidate_resolutions"," up to 1152×1152) cross-checked against the model ",[118,425,426],{},"config.json",". Verified ",[113,429,430],{},"2026-06-11",[432,433,434],"style",{},"html pre.shiki code .sBMFI, html code.shiki .sBMFI{--shiki-light:#E2931D;--shiki-default:#FFCB6B;--shiki-dark:#FFCB6B}html pre.shiki code .sfazB, html code.shiki .sfazB{--shiki-light:#91B859;--shiki-default:#C3E88D;--shiki-dark:#C3E88D}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":174,"searchDepth":274,"depth":436,"links":437},2,[438,439,440,441,442],{"id":133,"depth":436,"text":134},{"id":177,"depth":436,"text":178},{"id":253,"depth":436,"text":254},{"id":309,"depth":436,"text":310},{"id":384,"depth":436,"text":385},"How DeepSeek-VL2 tiles images at 384px and why the win is local-inference context, not API billing.","md",null,{},{"icon":59},{"title":75,"description":443},"rR9ZPv6Dm0PEZaZMEcTgpdP5kWl6ZD7eAzcXHvvfm8w",[451,453],{"title":71,"path":72,"stem":73,"description":452,"icon":64,"children":-1},"How Alibaba Qwen2-VL \u002F Qwen2.5-VL tokenizes images on a 28px grid and how VisionSqueezer snaps to it.",{"title":85,"path":86,"stem":87,"description":454,"icon":88,"children":-1},"Use VisionSqueezer from Python via native pyo3 wheels.",1782053692263]