convert : fix (ignore for now) typings errors (#22002)

This commit is contained in:
Sigbjørn Skjæret 2026-04-18 09:36:41 +02:00 committed by GitHub
parent fd1c0ec3f0
commit 89a5474f0e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -10912,14 +10912,14 @@ class NemotronHModel(GraniteHybridModel):
vocab_size = -(vocab_size // -pad_vocab) * pad_vocab
self.hparams["vocab_size"] = vocab_size
assert max(tokenizer.vocab.values()) < vocab_size
assert max(tokenizer.vocab.values()) < vocab_size # ty: ignore[unresolved-attribute]
tokpre = self.get_vocab_base_pre(tokenizer)
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
added_vocab = tokenizer.get_added_vocab()
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()} # ty: ignore[unresolved-attribute]
added_vocab = tokenizer.get_added_vocab() # ty: ignore[unresolved-attribute]
added_tokens_decoder = tokenizer.added_tokens_decoder
added_tokens_decoder = tokenizer.added_tokens_decoder # ty: ignore[unresolved-attribute]
for i in range(vocab_size):
if i not in reverse_vocab:
@ -10930,7 +10930,7 @@ class NemotronHModel(GraniteHybridModel):
if token in added_vocab:
if not added_tokens_decoder[i].normalized:
previous_token = token
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False))
token = tokenizer.decode(tokenizer.encode(token, add_special_tokens=False)) # ty: ignore[unresolved-attribute, invalid-assignment]
if previous_token != token:
logger.info(f"{repr(previous_token)} is encoded and decoded back to {repr(token)} using AutoTokenizer")