[ PROMPT_NODE_26780 ]
example_usage
[ SKILL_DOCUMENTATION ]
# MarkItDown 使用示例
本文档提供了在各种场景下使用 MarkItDown 的实际示例。
## 基础示例
### 1. 简单文件转换
python
from markitdown import MarkItDown
md = MarkItDown()
# 转换 PDF
result = md.convert("research_paper.pdf")
print(result.text_content)
# 转换 Word 文档
result = md.convert("manuscript.docx")
print(result.text_content)
# 转换 PowerPoint
result = md.convert("presentation.pptx")
print(result.text_content)
### 2. 保存到文件
python
from markitdown import MarkItDown
md = MarkItDown()
result = md.convert("document.pdf")
with open("output.md", "w", encoding="utf-8") as f:
f.write(result.text_content)
### 3. 从流转换
python
from markitdown import MarkItDown
md = MarkItDown()
with open("document.pdf", "rb") as f:
result = md.convert_stream(f, file_extension=".pdf")
print(result.text_content)
## 科学工作流
### 转换研究论文
python
from markitdown import MarkItDown
from pathlib import Path
md = MarkItDown()
# 转换目录中的所有论文
papers_dir = Path("research_papers/")
output_dir = Path("markdown_papers/")
output_dir.mkdir(exist_ok=True)
for paper in papers_dir.glob("*.pdf"):
result = md.convert(str(paper))
# 使用原始文件名保存
output_file = output_dir / f"{paper.stem}.md"
output_file.write_text(result.text_content)
print(f"Converted: {paper.name}")
### 从 Excel 提取表格
python
from markitdown import MarkItDown
md = MarkItDown()
# 将 Excel 转换为 Markdown 表格
result = md.convert("experimental_data.xlsx")
# 结果包含 Markdown 格式的表格
print(result.text_content)
# 保存以供进一步处理
with open("data_tables.md", "w") as f:
f.write(result.text_content)
### 处理演示幻灯片
python
from markitdown import MarkItDown
from openai import OpenAI
# 为图像添加 AI 描述
client = OpenAI()
md = MarkItDown(
llm_client=client,
llm_model="anthropic/claude-sonnet-4.5",
llm_prompt="Describe this scientific slide, focusing on data and key findings"
)
result = md.convert("conference_talk.pptx")
# 保存并附带元数据
output = f"""# Conference Talk
{result.text_content}
"""
with open("talk_notes.md", "w") as f:
f.write(output)
## AI 增强转换
### 详细图像描述
python
from markitdown import MarkItDown
from openai import OpenAI
# 初始化