Process a PDF

Submit a PDF (or EPUB, DOCX, PPTX, and other document formats) for OCR processing. Results are available as Mathpix Markdown, DOCX, LaTeX, HTML, and more.

info

PDF processing is asynchronous. You submit the document, then poll for status and download results when complete. For real-time partial results, use the streaming option.

Submit via URL

Submit a document URL to the v3/pdf endpoint:

{
  "url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
  "conversion_formats": { "docx": true, "tex.zip": true }
}

from mpxpy.mathpix_client import MathpixClient
client = MathpixClient(app_id="APP_ID", app_key="APP_KEY")
pdf = client.pdf_new(
    url="https://cdn.mathpix.com/examples/cs229-notes1.pdf",
    convert_to_docx=True,
    convert_to_tex_zip=True,
)
print(pdf.pdf_id)

curl -X POST https://api.mathpix.com/v3/pdf \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY' \
-H 'Content-Type: application/json' \
--data '{"url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf", "conversion_formats": {"docx": true, "tex.zip": true}}'

import requests
r = requests.post("https://api.mathpix.com/v3/pdf",
    json={
        "url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
        "conversion_formats": {"docx": True, "tex.zip": True}
    },
    headers={
        "app_id": "APP_ID",
        "app_key": "APP_KEY",
        "Content-type": "application/json"
    }
)
print(r.json())  # {"pdf_id": "..."}

const response = await fetch("https://api.mathpix.com/v3/pdf", {
  method: "POST",
  headers: {
    app_id: "APP_ID",
    app_key: "APP_KEY",
    "Content-Type": "application/json",
  },
  body: JSON.stringify({
    url: "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
    conversion_formats: { docx: true, "tex.zip": true },
  }),
});
const { pdf_id } = await response.json();
console.log(`PDF ID: ${pdf_id}`);

body := bytes.NewBufferString(`{
    "url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
    "conversion_formats": {"docx": true, "tex.zip": true}
}`)
req, _ := http.NewRequest("POST", "https://api.mathpix.com/v3/pdf", body)
req.Header.Set("app_id", "APP_ID")
req.Header.Set("app_key", "APP_KEY")
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
result, _ := io.ReadAll(resp.Body)
fmt.Println(string(result)) // {"pdf_id": "..."}

HttpClient client = HttpClient.newHttpClient();
String body = """
    {
      "url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
      "conversion_formats": { "docx": true, "tex.zip": true }
    }
    """;
HttpRequest request = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/v3/pdf"))
    .header("app_id", "APP_ID")
    .header("app_key", "APP_KEY")
    .header("Content-Type", "application/json")
    .POST(HttpRequest.BodyPublishers.ofString(body))
    .build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());

Example response
{
  "pdf_id": "2024_01_15_abc123def456"
}

Use the pdf_id to poll processing status, download results, and delete results.

Submit via file upload

Upload a document file to the v3/pdf endpoint via multipart form-data:

from mpxpy.mathpix_client import MathpixClient
client = MathpixClient(app_id="APP_ID", app_key="APP_KEY")
pdf = client.pdf_new(
    file_path="document.pdf",
    convert_to_docx=True,
    convert_to_tex_zip=True,
)
print(pdf.pdf_id)

curl -X POST https://api.mathpix.com/v3/pdf \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY' \
--form 'file=@"document.pdf"' \
--form 'options_json="{\"conversion_formats\": {\"docx\": true, \"tex.zip\": true}}"'

import requests, json
r = requests.post("https://api.mathpix.com/v3/pdf",
    files={"file": open("document.pdf", "rb")},
    data={
        "options_json": json.dumps({
            "conversion_formats": {"docx": True, "tex.zip": True}
        })
    },
    headers={
        "app_id": "APP_ID",
        "app_key": "APP_KEY"
    }
)
print(r.json())  # {"pdf_id": "..."}

import fs from "fs";
const form = new FormData();
form.append("file", new Blob([fs.readFileSync("document.pdf")]));
form.append("options_json", JSON.stringify({
  conversion_formats: { docx: true, "tex.zip": true },
}));
const response = await fetch("https://api.mathpix.com/v3/pdf", {
  method: "POST",
  headers: { app_id: "APP_ID", app_key: "APP_KEY" },
  body: form,
});
const { pdf_id } = await response.json();
console.log(`PDF ID: ${pdf_id}`);

var buf bytes.Buffer
w := multipart.NewWriter(&buf)
fw, _ := w.CreateFormFile("file", "document.pdf")
f, _ := os.Open("document.pdf")
io.Copy(fw, f)
f.Close()
w.WriteField("options_json", `{"conversion_formats":{"docx":true,"tex.zip":true}}`)
w.Close()
req, _ := http.NewRequest("POST", "https://api.mathpix.com/v3/pdf", &buf)
req.Header.Set("app_id", "APP_ID")
req.Header.Set("app_key", "APP_KEY")
req.Header.Set("Content-Type", w.FormDataContentType())
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
result, _ := io.ReadAll(resp.Body)
fmt.Println(string(result))

HttpClient client = HttpClient.newHttpClient();
Path file = Path.of("document.pdf");
String boundary = "----FormBoundary" + System.currentTimeMillis();
String optionsJson = "{\"conversion_formats\": {\"docx\": true, \"tex.zip\": true}}";
byte[] fileBytes = Files.readAllBytes(file);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
baos.write(("--" + boundary + "\r\nContent-Disposition: form-data; name=\"file\"; filename=\""
    + file.getFileName() + "\"\r\nContent-Type: application/octet-stream\r\n\r\n").getBytes());
baos.write(fileBytes);
baos.write(("\r\n--" + boundary + "\r\nContent-Disposition: form-data; name=\"options_json\"\r\n\r\n"
    + optionsJson + "\r\n--" + boundary + "--\r\n").getBytes());
HttpRequest request = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/v3/pdf"))
    .header("app_id", "APP_ID")
    .header("app_key", "APP_KEY")
    .header("Content-Type", "multipart/form-data; boundary=" + boundary)
    .POST(HttpRequest.BodyPublishers.ofByteArray(baos.toByteArray()))
    .build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());

Example response
{
  "pdf_id": "2024_01_15_abc123def456"
}

Use the pdf_id to poll processing status, download results, and delete results.

Poll processing status

After submitting, poll GET v3/pdf/{pdf_id} until status is "completed":

# wait_until_complete handles polling automatically
pdf.wait_until_complete(timeout=60)
print(pdf.pdf_status())

curl https://api.mathpix.com/v3/pdf/PDF_ID \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY'

import requests, time
headers = {"app_id": "APP_ID", "app_key": "APP_KEY"}
pdf_id = "YOUR_PDF_ID"
while True:
    r = requests.get(f"https://api.mathpix.com/v3/pdf/{pdf_id}", headers=headers)
    status = r.json()
    print(f"Status: {status['status']}, {status.get('percent_done', 0)}% done")
    if status["status"] in ("completed", "error"):
        break
    time.sleep(5)

const headers = { app_id: "APP_ID", app_key: "APP_KEY" };
const pdfId = "YOUR_PDF_ID";
while (true) {
  const response = await fetch(`https://api.mathpix.com/v3/pdf/${pdfId}`, { headers });
  const status = await response.json();
  console.log(`Status: ${status.status}, ${status.percent_done ?? 0}% done`);
  if (status.status === "completed" || status.status === "error") break;
  await new Promise((r) => setTimeout(r, 5000));
}

pdfId := "YOUR_PDF_ID"
delay := 5 * time.Second
for i := 0; i < 120; i++ { // timeout after ~10 minutes
    req, _ := http.NewRequest("GET", "https://api.mathpix.com/v3/pdf/"+pdfId, nil)
    req.Header.Set("app_id", "APP_ID")
    req.Header.Set("app_key", "APP_KEY")
    resp, _ := http.DefaultClient.Do(req)
    result, _ := io.ReadAll(resp.Body)
    resp.Body.Close()
    fmt.Println(string(result))
    if bytes.Contains(result, []byte(`"completed"`)) || bytes.Contains(result, []byte(`"error"`)) {
        break
    }
    time.Sleep(delay)
    if delay < 30*time.Second {
        delay = delay * 3 / 2 // backoff up to 30s
    }
}

HttpClient client = HttpClient.newHttpClient();
String pdfId = "YOUR_PDF_ID";
while (true) {
    HttpRequest request = HttpRequest.newBuilder()
        .uri(URI.create("https://api.mathpix.com/v3/pdf/" + pdfId))
        .header("app_id", "APP_ID")
        .header("app_key", "APP_KEY")
        .GET().build();
    HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
    System.out.println(response.body());
    if (response.body().contains("\"completed\"") || response.body().contains("\"error\"")) break;
    Thread.sleep(5000);
}

Example response while processing
{
  "status": "split",
  "num_pages": 12,
  "num_pages_completed": 4,
  "percent_done": 33.33
}

Example response when complete
{
  "status": "completed",
  "num_pages": 12,
  "num_pages_completed": 12,
  "percent_done": 100
}

Download results

Once processing is complete, download results from GET v3/pdf/{pdf_id}.{ext} by appending the format extension:

# Save to files
pdf.to_md_file(path="result.mmd")
pdf.to_docx_file(path="result.docx")
pdf.to_tex_zip_file(path="result.tex.zip")
pdf.to_lines_json_file(path="lines.json")
# Or get content in memory
md_text = pdf.to_md_text()       # str
docx_bytes = pdf.to_docx_bytes() # bytes
lines = pdf.to_lines_json()      # dict

# Mathpix Markdown
curl https://api.mathpix.com/v3/pdf/PDF_ID.mmd \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY' > result.mmd

# DOCX (requires conversion_formats: {"docx": true})
curl https://api.mathpix.com/v3/pdf/PDF_ID.docx \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY' > result.docx

# LaTeX zip
curl https://api.mathpix.com/v3/pdf/PDF_ID.tex.zip \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY' > result.tex.zip

# Line-by-line JSON data
curl https://api.mathpix.com/v3/pdf/PDF_ID.lines.json \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY' > lines.json

import requests
headers = {"app_id": "APP_ID", "app_key": "APP_KEY"}
pdf_id = "YOUR_PDF_ID"
# Mathpix Markdown
r = requests.get(f"https://api.mathpix.com/v3/pdf/{pdf_id}.mmd", headers=headers)
with open("result.mmd", "w") as f:
    f.write(r.text)
# DOCX (requires conversion_formats: {"docx": true})
r = requests.get(f"https://api.mathpix.com/v3/pdf/{pdf_id}.docx", headers=headers)
with open("result.docx", "wb") as f:
    f.write(r.content)
# LaTeX zip
r = requests.get(f"https://api.mathpix.com/v3/pdf/{pdf_id}.tex.zip", headers=headers)
with open("result.tex.zip", "wb") as f:
    f.write(r.content)

import fs from "fs";
const headers = { app_id: "APP_ID", app_key: "APP_KEY" };
const pdfId = "YOUR_PDF_ID";
// Mathpix Markdown
const mmd = await fetch(`https://api.mathpix.com/v3/pdf/${pdfId}.mmd`, { headers });
fs.writeFileSync("result.mmd", await mmd.text());
// DOCX
const docx = await fetch(`https://api.mathpix.com/v3/pdf/${pdfId}.docx`, { headers });
fs.writeFileSync("result.docx", Buffer.from(await docx.arrayBuffer()));

pdfId := "YOUR_PDF_ID"
for _, ext := range []string{"mmd", "docx", "tex.zip"} {
    req, _ := http.NewRequest("GET", "https://api.mathpix.com/v3/pdf/"+pdfId+"."+ext, nil)
    req.Header.Set("app_id", "APP_ID")
    req.Header.Set("app_key", "APP_KEY")
    resp, _ := http.DefaultClient.Do(req)
    data, _ := io.ReadAll(resp.Body)
    resp.Body.Close()
    os.WriteFile("result."+ext, data, 0644)
}

HttpClient client = HttpClient.newHttpClient();
String pdfId = "YOUR_PDF_ID";
// Mathpix Markdown
HttpRequest mmdReq = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/v3/pdf/" + pdfId + ".mmd"))
    .header("app_id", "APP_ID").header("app_key", "APP_KEY").GET().build();
HttpResponse<String> mmdResp = client.send(mmdReq, HttpResponse.BodyHandlers.ofString());
Files.writeString(Path.of("result.mmd"), mmdResp.body());
// DOCX
HttpRequest docxReq = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/v3/pdf/" + pdfId + ".docx"))
    .header("app_id", "APP_ID").header("app_key", "APP_KEY").GET().build();
HttpResponse<byte[]> docxResp = client.send(docxReq, HttpResponse.BodyHandlers.ofByteArray());
Files.write(Path.of("result.docx"), docxResp.body());

Check conversion status

If you requested conversion_formats, check their status separately:

print(pdf.pdf_conversion_status())

curl https://api.mathpix.com/v3/converter/PDF_ID \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY'

import requests
headers = {"app_id": "APP_ID", "app_key": "APP_KEY"}
r = requests.get(f"https://api.mathpix.com/v3/converter/{pdf_id}", headers=headers)
print(r.json())

const response = await fetch("https://api.mathpix.com/v3/converter/PDF_ID", {
  headers: { app_id: "APP_ID", app_key: "APP_KEY" },
});
console.log(await response.json());

req, _ := http.NewRequest("GET", "https://api.mathpix.com/v3/converter/PDF_ID", nil)
req.Header.Set("app_id", "APP_ID")
req.Header.Set("app_key", "APP_KEY")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
result, _ := io.ReadAll(resp.Body)
fmt.Println(string(result))

HttpClient client = HttpClient.newHttpClient();
HttpRequest request = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/v3/converter/PDF_ID"))
    .header("app_id", "APP_ID").header("app_key", "APP_KEY").GET().build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());

Example conversion status response
{
  "status": "completed",
  "conversion_status": {
    "docx": { "status": "completed" },
    "tex.zip": { "status": "completed" }
  }
}

Stream pages

For lower time-to-first-data, enable the streaming request parameter and connect to GET v3/pdf/{pdf_id}/stream to receive page results via server-sent events (SSE) as each page completes:

cURL
Python
JavaScript / TypeScript
Go
Java

# 1. Submit with streaming enabled
curl -X POST https://api.mathpix.com/v3/pdf \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY' \
-H 'Content-Type: application/json' \
--data '{"url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf", "streaming": true}'

# 2. Connect to the SSE stream
curl https://api.mathpix.com/v3/pdf/PDF_ID/stream \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY'

import requests, json
headers = {"app_id": "APP_ID", "app_key": "APP_KEY", "Content-type": "application/json"}
# 1. Submit with streaming enabled
r = requests.post("https://api.mathpix.com/v3/pdf",
    json={"url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf", "streaming": True},
    headers=headers
)
pdf_id = r.json()["pdf_id"]
# 2. Stream results via SSE
r = requests.get(f"https://api.mathpix.com/v3/pdf/{pdf_id}/stream",
    headers=headers, stream=True)
for line in r.iter_lines():
    if line:
        page = json.loads(line)
        print(f"Page {page['page_idx']}/{page['pdf_selected_len']}: {page['text'][:100]}...")

const headers = {
  app_id: "APP_ID",
  app_key: "APP_KEY",
  "Content-Type": "application/json",
};
// 1. Submit with streaming enabled
const submit = await fetch("https://api.mathpix.com/v3/pdf", {
  method: "POST",
  headers,
  body: JSON.stringify({
    url: "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
    streaming: true,
  }),
});
const { pdf_id } = await submit.json();
// 2. Stream results via SSE
const stream = await fetch(`https://api.mathpix.com/v3/pdf/${pdf_id}/stream`, { headers });
const reader = stream.body!.getReader();
const decoder = new TextDecoder();
while (true) {
  const { done, value } = await reader.read();
  if (done) break;
  console.log(decoder.decode(value));
}

// 1. Submit with streaming enabled
body := bytes.NewBufferString(`{
    "url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
    "streaming": true
}`)
req, _ := http.NewRequest("POST", "https://api.mathpix.com/v3/pdf", body)
req.Header.Set("app_id", "APP_ID")
req.Header.Set("app_key", "APP_KEY")
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
submitBody, _ := io.ReadAll(resp.Body)
resp.Body.Close()
// 2. Stream results via SSE (parse pdf_id from response)
pdfId := "YOUR_PDF_ID"
req, _ = http.NewRequest("GET", "https://api.mathpix.com/v3/pdf/"+pdfId+"/stream", nil)
req.Header.Set("app_id", "APP_ID")
req.Header.Set("app_key", "APP_KEY")
resp, _ = http.DefaultClient.Do(req)
defer resp.Body.Close()
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
    fmt.Println(scanner.Text())
}

HttpClient client = HttpClient.newHttpClient();
String body = """
    {"url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf", "streaming": true}
    """;
// 1. Submit with streaming enabled
HttpRequest submit = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/v3/pdf"))
    .header("app_id", "APP_ID").header("app_key", "APP_KEY")
    .header("Content-Type", "application/json")
    .POST(HttpRequest.BodyPublishers.ofString(body))
    .build();
HttpResponse<String> resp = client.send(submit, HttpResponse.BodyHandlers.ofString());
String pdfId = resp.body().replaceAll(".*\"pdf_id\":\"([^\"]+)\".*", "$1");
// 2. Stream results via SSE
HttpRequest stream = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/v3/pdf/" + pdfId + "/stream"))
    .header("app_id", "APP_ID").header("app_key", "APP_KEY").GET().build();
HttpResponse<java.util.stream.Stream<String>> sse = client.send(stream,
    HttpResponse.BodyHandlers.ofLines());
sse.body().forEach(System.out::println);

Pages are streamed one JSON object at a time. Pages are not guaranteed to be in order, although they generally will be.

Process specific pages

Use the page_ranges request parameter to process only certain pages.

{
  "url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
  "page_ranges": "2,4-6"
}

curl -X POST https://api.mathpix.com/v3/pdf \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY' \
-H 'Content-Type: application/json' \
--data '{"url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf", "page_ranges": "2,4-6"}'

import requests
r = requests.post("https://api.mathpix.com/v3/pdf",
    json={
        "url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
        "page_ranges": "2,4-6"
    },
    headers={
        "app_id": "APP_ID",
        "app_key": "APP_KEY",
        "Content-type": "application/json"
    }
)
print(r.json())

const response = await fetch("https://api.mathpix.com/v3/pdf", {
  method: "POST",
  headers: {
    app_id: "APP_ID",
    app_key: "APP_KEY",
    "Content-Type": "application/json",
  },
  body: JSON.stringify({
    url: "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
    page_ranges: "2,4-6",
  }),
});
const { pdf_id } = await response.json();
console.log(`PDF ID: ${pdf_id}`);

body := bytes.NewBufferString(`{
    "url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
    "page_ranges": "2,4-6"
}`)
req, _ := http.NewRequest("POST", "https://api.mathpix.com/v3/pdf", body)
req.Header.Set("app_id", "APP_ID")
req.Header.Set("app_key", "APP_KEY")
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
result, _ := io.ReadAll(resp.Body)
fmt.Println(string(result))

HttpClient client = HttpClient.newHttpClient();
String body = """
    {
      "url": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
      "page_ranges": "2,4-6"
    }
    """;
HttpRequest request = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/v3/pdf"))
    .header("app_id", "APP_ID")
    .header("app_key", "APP_KEY")
    .header("Content-Type", "application/json")
    .POST(HttpRequest.BodyPublishers.ofString(body))
    .build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());

Example response
{
  "pdf_id": "2024_01_15_abc123def456"
}

The value "2,4-6" selects pages [2, 4, 5, 6]. You can also use negative indices: "2 - -2" selects all pages from the second to the next-to-last.

Delete results

Permanently delete a PDF's output data via DELETE v3/pdf/{pdf_id} when you no longer need it:

cURL
Python
JavaScript / TypeScript
Go
Java

curl -X DELETE https://api.mathpix.com/v3/pdf/PDF_ID \
-H 'app_id: APP_ID' \
-H 'app_key: APP_KEY'

import requests
r = requests.delete(f"https://api.mathpix.com/v3/pdf/{pdf_id}",
    headers={"app_id": "APP_ID", "app_key": "APP_KEY"})
print(r.status_code)

const response = await fetch("https://api.mathpix.com/v3/pdf/PDF_ID", {
  method: "DELETE",
  headers: { app_id: "APP_ID", app_key: "APP_KEY" },
});
console.log(response.status);

req, _ := http.NewRequest("DELETE", "https://api.mathpix.com/v3/pdf/PDF_ID", nil)
req.Header.Set("app_id", "APP_ID")
req.Header.Set("app_key", "APP_KEY")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
fmt.Println(resp.StatusCode)

HttpClient client = HttpClient.newHttpClient();
HttpRequest request = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/v3/pdf/PDF_ID"))
    .header("app_id", "APP_ID").header("app_key", "APP_KEY")
    .DELETE().build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.statusCode());

warning

Download and store files locally before deleting if you need to keep them. Deletion is permanent.

note

PDF page images and cropped images (figures, diagrams) served via CDN may remain accessible for up to 10 minutes after deletion while cached copies expire.

Retention

Uploaded source documents and the page image files that back cdn.mathpix.com/cropped/... URLs are retained for up to 30 days. Mathpix Markdown, JSON Lines, and other text outputs are retained for up to 90 days. See Data Retention for the full matrix by artifact type, how improve_mathpix affects retention, and how to request extended retention on your account.

If you need image URLs from your outputs to remain accessible long-term, request a zip output format (.mmd.zip, .docx, .html.zip, .tex.zip, etc.) at processing time — zip outputs embed all referenced images inline and are self-contained.

Supported formats

See Supported Formats for the full list of accepted input and output formats.

Next steps

v3/pdf reference — Full request parameters, response schema, streaming, status polling, and lines data
v3/converter reference — Convert MMD text to other formats directly
Authentication — How to get your API keys

Submit via URL​

Submit via file upload​

Poll processing status​

Download results​

Check conversion status​

Stream pages​

Process specific pages​

Delete results​

Retention​

Supported formats​

Next steps​