mirrored 19 minutes ago
1
openhandsFix mypy type-checking errors - Remove unused type ignore comments from multiple files - Fix TypedDict type mismatch in browser_env/actions.py by ensuring arguments are converted to strings - Install missing type stubs (types-requests, types-tqdm) All core packages (browser_env, agent, evaluation_harness, llms, tests) now pass mypy checks. Co-authored-by: openhands <openhands@all-hands.dev> 60a6ca1
from typing import Any

import tiktoken
from transformers import LlamaTokenizer


class Tokenizer(object):
    def __init__(self, provider: str, model_name: str) -> None:
        if provider == "openai":
            self.tokenizer = tiktoken.encoding_for_model(model_name)
        elif provider == "huggingface":
            self.tokenizer = LlamaTokenizer.from_pretrained(model_name)
            # turn off adding special tokens automatically
            self.tokenizer.add_special_tokens = False  # type: ignore[attr-defined]
            self.tokenizer.add_bos_token = False  # type: ignore[attr-defined]
            self.tokenizer.add_eos_token = False  # type: ignore[attr-defined]
        else:
            raise NotImplementedError

    def encode(self, text: str) -> list[int]:
        return self.tokenizer.encode(text)

    def decode(self, ids: list[int]) -> str:
        return self.tokenizer.decode(ids)

    def __call__(self, text: str) -> list[int]:
        return self.tokenizer.encode(text)