1### Create the llama model with custom config. Convert it to bitnet.
2model = LlamaForCausalLM(config)
3convert_to_bitnet(model, copy_weights=False)
4model_size = sum(t.numel() for t in model.parameters())
5print(f"Model size: {model_size/1000**2:.1f}M parameters")
6tokenizer.pad_token = tokenizer.eos_token
7data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
8
9output_path = "./out"
10args = TrainingArguments(
11 output_dir=output_path,
12 per_device_train_batch_size=BATCH_SIZE,
13 logging_steps=100,
14 gradient_accumulation_steps=2,
15 num_train_epochs=EPOCHS,
16 weight_decay=0.01,
17 warmup_steps=0.1,
18 lr_scheduler_type="cosine",
19 learning_rate=LEARNING_RATE,
20 save_steps=0.25,
21 fp16=True,
22 report_to="wandb"
23)
24
25trainer = Trainer(
26 model=model,
27 tokenizer=tokenizer,
28 args=args,
29 data_collator=data_collator,
30 train_dataset=tokenized_data["train"],
31)
32
33trainer.train()
34trainer.save_model(f"{output_path}/final_model")
35folder = "./out/final_model"
36api = HfApi()
37create_repo(
38 repo_id = f"{HUGGINGFACE_ID}/{NEW_MODEL}",
39 repo_type="model",
40 exist_ok=True,
41 token=HF_TOKEN,
42)
43
44# Upload Model files
45api.upload_folder(
46 folder_path=folder,
47 repo_type="model",
48 repo_id=f"{HUGGINGFACE_ID}/{NEW_MODEL}",
49 token=HF_TOKEN,
50)