Tokenize

Count number of tokens in a submitted string

You should use this endpoint only through the SDK. The SDK uses the latest Jamba tokenizer. The REST endpoint uses an older tokenizer

Calling syntax

import os
from ai21 import tokenizers

def tokenize():
    tokenizer = tokenizers.get_tokenizer();
    response = tokenizer.tokenize("I want to break free.")
    print(response)
    
# Response
['▁I▁want▁to', '▁break', '▁free', '.']
# WARNING: The REST endpoint currently uses an older tokenizer. To use
# the tokenizer used by Jamba, use the SDK.
import requests
ROOT_URL = "https://api.ai21.com/studio/v1/"

def tokenize():
   url = ROOT_URL + "tokenize"
   response = requests.post(
      url,
      headers={"Authorization": f"Bearer {AI21_API_KEY}"}, 
      json={"text": "I want to break free."}
   )
   print(response.json())
    
    
# Response
{'text': 'I want to break free.',
 'tokens': [{'token': '▁I▁want▁to', 'textRange': {'start': 0, 'end': 9}},
            {'token': '▁break', 'textRange': {'start': 9, 'end': 15}},
            {'token': '▁free', 'textRange': {'start': 15, 'end': 20}},
            {'token': '.', 'textRange': {'start': 20, 'end': 21}}]
}
Language
Credentials
Header