You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
importmlx.coreasmxfromairllmimportAutoModelmodel=AutoModel.from_pretrained("unsloth/Meta-Llama-3.1-405B-Instruct-bnb-4bit")
input_text= [
'What is the capital of United States?',
]
MAX_LENGTH=128input_tokens=model.tokenizer(input_text,
return_tensors="np",
return_attention_mask=False,
truncation=True,
max_length=MAX_LENGTH,
padding=False)
print(input_tokens)
generation_output=model.generate(
mx.array(input_tokens['input_ids']),
max_new_tokens=30,
use_cache=True,
return_dict_in_generate=True)
print(generation_output)
I get this error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[3], line 20
11 input_tokens = model.tokenizer(input_text,
12 return_tensors=\"np\",
13 return_attention_mask=False,
14 truncation=True,
15 max_length=MAX_LENGTH,
16 padding=False)
18 print(input_tokens)
---> 20 generation_output = model.generate(
21 mx.array(input_tokens['input_ids']),
22 max_new_tokens=30,
23 use_cache=True,
24 return_dict_in_generate=True)
26 print(generation_output)
File ~/local_projects/airllm/.venv/lib/python3.12/site-packages/airllm/airllm_llama_mlx.py:254, in AirLLMLlamaMlx.generate(self, x, temperature, max_new_tokens, **kwargs)
252 def generate(self, x, temperature=0, max_new_tokens=None, **kwargs):
253 tokens = []
--> 254 for token in self.model_generate(x, temperature=temperature):
255 tokens.append(token)
258 if len(tokens) >= max_new_tokens:
File ~/local_projects/airllm/.venv/lib/python3.12/site-packages/airllm/airllm_llama_mlx.py:312, in AirLLMLlamaMlx.model_generate(self, x, temperature, max_new_tokens)
306 l = TransformerBlock(args=self.model_args)
307 l.update(
308 ModelPersister.get_model_persister().load_model(f'{self.layer_names_dict[\"layer_prefix\"]}.{il}',
309 self.checkpoint_path)['layers'][il]
310 )
--> 312 x, c = l(x, mask=mask)
313 # force execution
314 mx.eval(x)
File ~/local_projects/airllm/.venv/lib/python3.12/site-packages/airllm/airllm_llama_mlx.py:173, in TransformerBlock.__call__(self, x, mask, cache)
167 def __call__(
168 self,
169 x: mx.array,
170 mask: Optional[mx.array] = None,
171 cache: Optional[Tuple[mx.array, mx.array]] = None,
172 ) -> mx.array:
--> 173 r, cache = self.attention(self.attention_norm(x), mask, cache)
174 h = x + r
175 r = self.feed_forward(self.ffn_norm(h))
File ~/local_projects/airllm/.venv/lib/python3.12/site-packages/airllm/airllm_llama_mlx.py:113, in Attention.__call__(self, x, mask, cache)
105 def __call__(
106 self,
107 x: mx.array,
108 mask: Optional[mx.array] = None,
109 cache: Optional[Tuple[mx.array, mx.array]] = None,
110 ) -> mx.array:
111 B, L, D = x.shape
--> 113 queries, keys, values = self.wq(x), self.wk(x), self.wv(x)
115 # Prepare the queries, keys and values for the attention computation
116 queries = queries.reshape(B, L, self.n_heads, -1).transpose(0, 2, 1, 3)
File ~/local_projects/airllm/.venv/lib/python3.12/site-packages/mlx/nn/layers/linear.py:70, in Linear.__call__(self, x)
68 x = mx.addmm(self[\"bias\"], x, self[\"weight\"].T)
69 else:
---> 70 x = x @ self[\"weight\"].T
71 return x
AttributeError: 'dict' object has no attribute 'T'"
I'm pretty much following the provided examples, what am I missing?
The text was updated successfully, but these errors were encountered:
When running the following on Mac M3:
I get this error:
I'm pretty much following the provided examples, what am I missing?
The text was updated successfully, but these errors were encountered: