[ PROMPT_NODE_22926 ]
Prompt Engineering Outlines 后端
[ SKILL_DOCUMENTATION ]
# 后端配置指南
配置 Outlines 以适配不同模型后端的完整指南。
## 目录
- 本地模型 (Transformers, llama.cpp, vLLM)
- API 模型 (OpenAI)
- 性能对比
- 配置示例
- 生产环境部署
## Transformers (Hugging Face)
### 基础设置
python
import outlines
# 从 Hugging Face 加载模型
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
# 配合生成器使用
generator = outlines.generate.json(model, YourModel)
result = generator("Your prompt")
### GPU 配置
python
# 使用 CUDA GPU
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cuda"
)
# 使用特定 GPU
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cuda:0" # GPU 0
)
# 使用 CPU
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cpu"
)
# 使用 Apple Silicon MPS
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="mps"
)
### 高级配置
python
# FP16 以实现更快的推理
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cuda",
model_kwargs={
"torch_dtype": "float16"
}
)
# 8-bit 量化 (更少内存占用)
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cuda",
model_kwargs={
"load_in_8bit": True,
"device_map": "auto"
}
)
# 4-bit 量化 (极低内存占用)
model = outlines.models.transformers(
"meta-llama/Llama-3.1-70B-Instruct",
device="cuda",
model_kwargs={
"load_in_4bit": True,
"device_map": "auto",
"bnb_4bit_compute_dtype": "float16"
}
)
# 多 GPU
model = outlines.models.transformers(
"meta-llama/Llama-3.1-70B-Instruct",
device="cuda",
model_kwargs={
"device_map": "auto", # 自动 GPU 分配
"max_memory": {0: "40GB", 1: "40GB"} # 单 GPU 限制
}
)
### 热门模型
python
# Phi-4 (Microsoft)
model = outlines.models.transformers("microsoft/Phi-4-mini-instruct")
model = outlines.models.transformers("microsoft/Phi-3-medium-4k-instruct")
# Llama 3.1 (Meta)
model = outlines.models.transformers("meta-llama/Llama-3.1-8B-Instruct")
model = outlines.models.transformers("meta-llama/Llama-3.1-70B-Instruct")
model = outlines.models.transformers("meta-llama/Llama-3.1-405B-Instruct")
# Mistral (Mi