apiVersion: apps/v1
kind: Deployment
metadata:
  name: test-regular-gpu
  namespace: default
  labels:
    app: test-regular
spec:
  strategy:
    type: Recreate
  replicas: 1
  selector:
    matchLabels:
      app: test-regular
  template:
    metadata:
      labels:
        app: test-regular
    spec:
      volumes:
      - name: workspace
        hostPath:
          path: /mnt/nvme/regular
          type: DirectoryOrCreate
      - name: shm
        emptyDir:
          medium: Memory
          sizeLimit: "10Gi"
      containers:
        ## Update the image to your own container repository
      - image: iad.ocir.io/idxzjcdxqj/vllm/vllm-openai:v0.11.0-regular
        name: test
        args:
        - cpatonn/Qwen3-30B-A3B-Instruct-2507-AWQ-4bit
        - --max-model-len
        - "8192"
        env:
        - name: HF_HOME
          value: "/workspace"
        resources:
          requests:
            nvidia.com/gpu: 1
          limits:
            nvidia.com/gpu: 1
        securityContext:
          capabilities:
            add:
            - IPC_LOCK
        volumeMounts:
        - mountPath: /workspace/
          name: workspace
        - mountPath: /dev/shm
          name: shm
      ## Replace the nodename with the name of the node where stargz-store is configured.
      nodeName: 10.140.34.52    
      tolerations:
      - key: nvidia.com/gpu
        operator: Exists
        effect: NoSchedule