apiVersion: apps/v1 kind: Deployment metadata: name: test-regular-gpu namespace: default labels: app: test-regular spec: strategy: type: Recreate replicas: 1 selector: matchLabels: app: test-regular template: metadata: labels: app: test-regular spec: volumes: - name: workspace hostPath: path: /mnt/nvme/regular type: DirectoryOrCreate - name: shm emptyDir: medium: Memory sizeLimit: "10Gi" containers: ## Update the image to your own container repository - image: iad.ocir.io/idxzjcdxqj/vllm/vllm-openai:v0.11.0-regular name: test args: - cpatonn/Qwen3-30B-A3B-Instruct-2507-AWQ-4bit - --max-model-len - "8192" env: - name: HF_HOME value: "/workspace" resources: requests: nvidia.com/gpu: 1 limits: nvidia.com/gpu: 1 securityContext: capabilities: add: - IPC_LOCK volumeMounts: - mountPath: /workspace/ name: workspace - mountPath: /dev/shm name: shm ## Replace the nodename with the name of the node where stargz-store is configured. nodeName: 10.140.34.52 tolerations: - key: nvidia.com/gpu operator: Exists effect: NoSchedule