Files API Quickstart

Submit a real batch in five minutes. By the end of this guide you will have submitted a single document, polled its status, downloaded its converted result, and submitted a two-item batch, using only your API key. Every step shows cURL, Python, JavaScript / TypeScript, Go, and Java.

Prerequisites

An API key. Get one from the Mathpix Console and export it as APP_KEY in your shell.
For s3://, gs://, or Azure Blob URLs, a registered data source for the bucket. Public https:// URLs work without any setup.

export APP_KEY="your-app-key"

1. Submit a single document

Use POST /files/v1/uri to submit one document by URL. The request below submits a public PDF and asks for DOCX and Markdown outputs in addition to the default Mathpix Markdown.

cURL
Python
JavaScript / TypeScript
Go
Java

curl -X POST https://api.mathpix.com/files/v1/uri \
-H "app_key: $APP_KEY" \
-H 'Content-Type: application/json' \
--data '{
  "source_uri": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
  "conversion_formats": { "docx": true, "md": true }
}'

import os, requests
r = requests.post("https://api.mathpix.com/files/v1/uri",
    json={
        "source_uri": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
        "conversion_formats": {"docx": True, "md": True},
    },
    headers={"app_key": os.environ["APP_KEY"], "Content-Type": "application/json"},
)
file_id = r.json()["file_id"]
print(file_id)

const response = await fetch("https://api.mathpix.com/files/v1/uri", {
  method: "POST",
  headers: { app_key: process.env.APP_KEY, "Content-Type": "application/json" },
  body: JSON.stringify({
    source_uri: "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
    conversion_formats: { docx: true, md: true },
  }),
});
const { file_id } = await response.json();
console.log(file_id);

body := bytes.NewBufferString(`{
  "source_uri": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
  "conversion_formats": {"docx": true, "md": true}
}`)
req, _ := http.NewRequest("POST", "https://api.mathpix.com/files/v1/uri", body)
req.Header.Set("app_key", os.Getenv("APP_KEY"))
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
result, _ := io.ReadAll(resp.Body)
fmt.Println(string(result)) // {"file_id": "<uuid>"}

HttpClient client = HttpClient.newHttpClient();
String body = """
    {
      "source_uri": "https://cdn.mathpix.com/examples/cs229-notes1.pdf",
      "conversion_formats": {"docx": true, "md": true}
    }
    """;
HttpRequest request = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/files/v1/uri"))
    .header("app_key", System.getenv("APP_KEY"))
    .header("Content-Type", "application/json")
    .POST(HttpRequest.BodyPublishers.ofString(body))
    .build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());

Example response
{
  "file_id": "b1c9c3a8-55e4-4a09-b7d0-218ba5de4c4d"
}

Keep the returned file_id; it is how you check status and download results.

2. Check status

Poll GET /files/v1/{file_id} until status is "completed" (or "error"). A typical document moves through pending (just submitted), then split (pages extracted, OCR in progress, with percent_done rising), then completed. The loops below poll every two seconds and stop at a terminal status.

cURL
Python
JavaScript / TypeScript
Go
Java

curl -H "app_key: $APP_KEY" \
  "https://api.mathpix.com/files/v1/$FILE_ID"

import os, requests, time
while True:
    r = requests.get(f"https://api.mathpix.com/files/v1/{file_id}",
                     headers={"app_key": os.environ["APP_KEY"]})
    body = r.json()
    print(body["status"], body["percent_done"])
    if body["status"] in ("completed", "error"):
        break
    time.sleep(2)

let body;
while (true) {
  const response = await fetch(`https://api.mathpix.com/files/v1/${file_id}`, {
    headers: { app_key: process.env.APP_KEY },
  });
  body = await response.json();
  console.log(body.status, body.percent_done);
  if (body.status === "completed" || body.status === "error") break;
  await new Promise((resolve) => setTimeout(resolve, 2000));
}

for {
    req, _ := http.NewRequest("GET", "https://api.mathpix.com/files/v1/"+fileID, nil)
    req.Header.Set("app_key", os.Getenv("APP_KEY"))
    resp, _ := http.DefaultClient.Do(req)
    result, _ := io.ReadAll(resp.Body)
    resp.Body.Close()
    var status struct {
        Status      string  `json:"status"`
        PercentDone float64 `json:"percent_done"`
    }
    json.Unmarshal(result, &status)
    fmt.Println(status.Status, status.PercentDone)
    if status.Status == "completed" || status.Status == "error" {
        break
    }
    time.Sleep(2 * time.Second)
}

HttpClient client = HttpClient.newHttpClient();
while (true) {
    HttpRequest request = HttpRequest.newBuilder()
        .uri(URI.create("https://api.mathpix.com/files/v1/" + fileId))
        .header("app_key", System.getenv("APP_KEY"))
        .GET()
        .build();
    String body = client.send(request, HttpResponse.BodyHandlers.ofString()).body();
    System.out.println(body);
    if (body.contains("\"completed\"") || body.contains("\"error\"")) {
        break;
    }
    Thread.sleep(2000);
}

3. Download the result

Once status is completed, request results by extension via GET /files/v1/{file_id}.{ext}. The examples below download the Mathpix Markdown output and the DOCX output requested at submission.

Always produced (download without pre-requesting): mmd, lines.json, lines.mmd.json.
On request (set in conversion_formats on submission): docx, xlsx, html, tex.zip, md, and others; see the availability table in Supported Formats.

cURL
Python
JavaScript / TypeScript
Go
Java

curl -H "app_key: $APP_KEY" \
  "https://api.mathpix.com/files/v1/$FILE_ID.mmd" \
  -o result.mmd

curl -H "app_key: $APP_KEY" \
  "https://api.mathpix.com/files/v1/$FILE_ID.docx" \
  -o result.docx

import os, requests
for ext in ("mmd", "docx"):
    r = requests.get(f"https://api.mathpix.com/files/v1/{file_id}.{ext}",
                     headers={"app_key": os.environ["APP_KEY"]})
    with open(f"result.{ext}", "wb") as f:
        f.write(r.content)

import { writeFile } from "node:fs/promises";
for (const ext of ["mmd", "docx"]) {
  const response = await fetch(
    `https://api.mathpix.com/files/v1/${file_id}.${ext}`,
    { headers: { app_key: process.env.APP_KEY } },
  );
  await writeFile(`result.${ext}`, Buffer.from(await response.arrayBuffer()));
}

for _, ext := range []string{"mmd", "docx"} {
    req, _ := http.NewRequest("GET", "https://api.mathpix.com/files/v1/"+fileID+"."+ext, nil)
    req.Header.Set("app_key", os.Getenv("APP_KEY"))
    resp, _ := http.DefaultClient.Do(req)
    out, _ := os.Create("result." + ext)
    io.Copy(out, resp.Body)
    resp.Body.Close()
    out.Close()
}

HttpClient client = HttpClient.newHttpClient();
for (String ext : new String[]{"mmd", "docx"}) {
    HttpRequest request = HttpRequest.newBuilder()
        .uri(URI.create("https://api.mathpix.com/files/v1/" + fileId + "." + ext))
        .header("app_key", System.getenv("APP_KEY"))
        .GET()
        .build();
    client.send(request, HttpResponse.BodyHandlers.ofFile(Path.of("result." + ext)));
}

4. Submit many at once

For batches, use POST /files/v1/jobs: up to 200,000 files in a single request. Pass an array of source URIs plus job-wide conversion and OCR options applied to every file. Each file can carry an optional custom_id for your own correlation. job_id is optional (the server generates one if you omit it), but you must supply your own when you use custom_id, as the example below does.

The batch below submits two documents. The second one references a URL that does not exist, which lets step 5 show how failures surface.

cURL
Python
JavaScript / TypeScript
Go
Java

curl -X POST https://api.mathpix.com/files/v1/jobs \
-H "app_key: $APP_KEY" \
-H 'Content-Type: application/json' \
--data '{
  "job_id": "quickstart-batch",
  "files": [
    { "source_uri": "https://cdn.mathpix.com/examples/cs229-notes1.pdf", "custom_id": "cs229" },
    { "source_uri": "https://example.com/manual.pdf", "custom_id": "manual" }
  ],
  "conversion_formats": { "docx": true, "md": true }
}'

import os, requests
r = requests.post("https://api.mathpix.com/files/v1/jobs",
    json={
        "job_id": "quickstart-batch",
        "files": [
            {"source_uri": "https://cdn.mathpix.com/examples/cs229-notes1.pdf", "custom_id": "cs229"},
            {"source_uri": "https://example.com/manual.pdf", "custom_id": "manual"},
        ],
        "conversion_formats": {"docx": True, "md": True},
    },
    headers={"app_key": os.environ["APP_KEY"], "Content-Type": "application/json"},
)
print(r.json())  # {"file_count": 2, "job_id": "quickstart-batch"}

const response = await fetch("https://api.mathpix.com/files/v1/jobs", {
  method: "POST",
  headers: { app_key: process.env.APP_KEY, "Content-Type": "application/json" },
  body: JSON.stringify({
    job_id: "quickstart-batch",
    files: [
      { source_uri: "https://cdn.mathpix.com/examples/cs229-notes1.pdf", custom_id: "cs229" },
      { source_uri: "https://example.com/manual.pdf", custom_id: "manual" },
    ],
    conversion_formats: { docx: true, md: true },
  }),
});
const { job_id, file_count } = await response.json();
console.log(`Job ${job_id} accepted ${file_count} files`);

body := bytes.NewBufferString(`{
  "job_id": "quickstart-batch",
  "files": [
    {"source_uri": "https://cdn.mathpix.com/examples/cs229-notes1.pdf", "custom_id": "cs229"},
    {"source_uri": "https://example.com/manual.pdf", "custom_id": "manual"}
  ],
  "conversion_formats": {"docx": true, "md": true}
}`)
req, _ := http.NewRequest("POST", "https://api.mathpix.com/files/v1/jobs", body)
req.Header.Set("app_key", os.Getenv("APP_KEY"))
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
result, _ := io.ReadAll(resp.Body)
fmt.Println(string(result)) // {"file_count": 2, "job_id": "quickstart-batch"}

HttpClient client = HttpClient.newHttpClient();
String body = """
    {
      "job_id": "quickstart-batch",
      "files": [
        {"source_uri": "https://cdn.mathpix.com/examples/cs229-notes1.pdf", "custom_id": "cs229"},
        {"source_uri": "https://example.com/manual.pdf", "custom_id": "manual"}
      ],
      "conversion_formats": {"docx": true, "md": true}
    }
    """;
HttpRequest request = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/files/v1/jobs"))
    .header("app_key", System.getenv("APP_KEY"))
    .header("Content-Type", "application/json")
    .POST(HttpRequest.BodyPublishers.ofString(body))
    .build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());

Example response
{
  "file_count": 2,
  "job_id": "quickstart-batch"
}

5. Track the job

Poll GET /files/v1/jobs/{job_id} for status and counters, then list the failed files via GET /files/v1/jobs/{job_id}/files?status=error. In this batch the manual file fails (its URL does not exist), so the counters show one completed and one errored file, and the error listing identifies which one by your custom_id.

cURL
Python
JavaScript / TypeScript
Go
Java

curl -H "app_key: $APP_KEY" \
  "https://api.mathpix.com/files/v1/jobs/quickstart-batch"

curl -H "app_key: $APP_KEY" \
  "https://api.mathpix.com/files/v1/jobs/quickstart-batch/files?status=error"

import os, requests, time
headers = {"app_key": os.environ["APP_KEY"]}
while True:
    body = requests.get("https://api.mathpix.com/files/v1/jobs/quickstart-batch",
                        headers=headers).json()
    print(body["status"], body["files_completed"], "/", body["file_count"])
    if body["status"] == "completed":
        break
    time.sleep(5)
errored = requests.get("https://api.mathpix.com/files/v1/jobs/quickstart-batch/files",
                       params={"status": "error"}, headers=headers).json()
print([f["custom_id"] for f in errored["files"]])

const headers = { app_key: process.env.APP_KEY };
let job;
while (true) {
  job = await (await fetch("https://api.mathpix.com/files/v1/jobs/quickstart-batch", { headers })).json();
  console.log(job.status, job.files_completed, "/", job.file_count);
  if (job.status === "completed") break;
  await new Promise((resolve) => setTimeout(resolve, 5000));
}
const errored = await (await fetch(
  "https://api.mathpix.com/files/v1/jobs/quickstart-batch/files?status=error",
  { headers },
)).json();
console.log(errored.files.map((f) => f.custom_id));

for {
    req, _ := http.NewRequest("GET", "https://api.mathpix.com/files/v1/jobs/quickstart-batch", nil)
    req.Header.Set("app_key", os.Getenv("APP_KEY"))
    resp, _ := http.DefaultClient.Do(req)
    result, _ := io.ReadAll(resp.Body)
    resp.Body.Close()
    var job struct{ Status string `json:"status"` }
    json.Unmarshal(result, &job)
    fmt.Println(string(result))
    if job.Status == "completed" {
        break
    }
    time.Sleep(5 * time.Second)
}
req, _ := http.NewRequest("GET", "https://api.mathpix.com/files/v1/jobs/quickstart-batch/files?status=error", nil)
req.Header.Set("app_key", os.Getenv("APP_KEY"))
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
errored, _ := io.ReadAll(resp.Body)
fmt.Println(string(errored))

HttpClient client = HttpClient.newHttpClient();
while (true) {
    HttpRequest request = HttpRequest.newBuilder()
        .uri(URI.create("https://api.mathpix.com/files/v1/jobs/quickstart-batch"))
        .header("app_key", System.getenv("APP_KEY"))
        .GET()
        .build();
    String job = client.send(request, HttpResponse.BodyHandlers.ofString()).body();
    System.out.println(job);
    if (job.contains("\"completed\"")) {
        break;
    }
    Thread.sleep(5000);
}
HttpRequest listRequest = HttpRequest.newBuilder()
    .uri(URI.create("https://api.mathpix.com/files/v1/jobs/quickstart-batch/files?status=error"))
    .header("app_key", System.getenv("APP_KEY"))
    .GET()
    .build();
System.out.println(client.send(listRequest, HttpResponse.BodyHandlers.ofString()).body());

The job status once every file reaches a terminal state:

Example response (job status)
{
  "job_id": "quickstart-batch",
  "status": "completed",
  "file_count": 2,
  "files_completed": 1,
  "files_errored": 1,
  "created_at": "2026-07-21T18:05:20.519Z",
  "modified_at": "2026-07-21T18:06:28.267Z"
}

The error listing identifies the failed file by its custom_id:

Example response (errored files)
{
  "files": [
    {
      "file_id": "f7d3a210-6c4e-49f3-bd5e-8e1c2f4d6b9a",
      "filename": "f7d3a210-6c4e-49f3-bd5e-8e1c2f4d6b9a.pdf",
      "status": "error",
      "custom_id": "manual"
    }
  ],
  "next_page_token": null
}

Once the job is completed, download per-file outputs the same way as step 3: GET /files/v1/{file_id}.{ext} for each file_id returned by the listing.

Where to go next

Process a Document Async: full reference for the single-document endpoint.
Async Batch Document Processing: full reference for jobs, including pagination and idempotency.
Async Document Lifecycle: file status, download, and DELETE.
Connect your cloud storage: register data sources for your AWS S3, Google Cloud Storage, or Azure Blob Storage buckets so you can send private URIs and have results uploaded directly to your storage.
Migrate From SCS to Files API: for existing SCS customers.

Prerequisites​

1. Submit a single document​

2. Check status​

3. Download the result​

4. Submit many at once​

5. Track the job​

Where to go next​