post https://api.ai21.com/studio/v1/tokenize
You should use this endpoint only through the SDK. The SDK uses the latest Jamba tokenizer. The REST endpoint uses an older tokenizer
Calling syntax
import os
from ai21 import tokenizers
def tokenize():
tokenizer = tokenizers.get_tokenizer();
response = tokenizer.tokenize("I want to break free.")
print(response)
# Response
['▁I▁want▁to', '▁break', '▁free', '.']
# WARNING: The REST endpoint currently uses an older tokenizer. To use
# the tokenizer used by Jamba, use the SDK.
import requests
ROOT_URL = "https://api.ai21.com/studio/v1/"
def tokenize():
url = ROOT_URL + "tokenize"
response = requests.post(
url,
headers={"Authorization": f"Bearer {AI21_API_KEY}"},
json={"text": "I want to break free."}
)
print(response.json())
# Response
{'text': 'I want to break free.',
'tokens': [{'token': '▁I▁want▁to', 'textRange': {'start': 0, 'end': 9}},
{'token': '▁break', 'textRange': {'start': 9, 'end': 15}},
{'token': '▁free', 'textRange': {'start': 15, 'end': 20}},
{'token': '.', 'textRange': {'start': 20, 'end': 21}}]
}