@@ -50,26 +50,111 @@ static string expand_path(const char *path) {
5050 return result;
5151}
5252
53+ //
54+ // llama.set_max_tokens(50)
55+ //
56+ static int cmd_llama_set_max_tokens (var_s *self, int argc, slib_par_t *arg, var_s *retval) {
57+ int result = 0 ;
58+ if (argc != 1 ) {
59+ error (retval, " llama.set_max_tokens" , 1 , 1 );
60+ } else {
61+ int id = get_class_id (self, retval);
62+ if (id != -1 ) {
63+ Llama &llama = g_map.at (id);
64+ llama.set_max_tokens (get_param_int (argc, arg, 0 , 0 ));
65+ result = 1 ;
66+ }
67+ }
68+ return result;
69+ }
70+
71+ //
72+ // llama.set_min_p(0.5)
73+ //
74+ static int cmd_llama_set_min_p (var_s *self, int argc, slib_par_t *arg, var_s *retval) {
75+ int result = 0 ;
76+ if (argc != 1 ) {
77+ error (retval, " llama.set_min_p" , 1 , 1 );
78+ } else {
79+ int id = get_class_id (self, retval);
80+ if (id != -1 ) {
81+ Llama &llama = g_map.at (id);
82+ llama.set_min_p (get_param_num (argc, arg, 0 , 0 ));
83+ result = 1 ;
84+ }
85+ }
86+ return result;
87+ }
88+
89+ //
90+ // llama.set_temperature(0.8)
91+ //
92+ static int cmd_llama_set_temperature (var_s *self, int argc, slib_par_t *arg, var_s *retval) {
93+ int result = 0 ;
94+ if (argc != 1 ) {
95+ error (retval, " llama.set_temperature" , 1 , 1 );
96+ } else {
97+ int id = get_class_id (self, retval);
98+ if (id != -1 ) {
99+ Llama &llama = g_map.at (id);
100+ llama.set_temperature (get_param_num (argc, arg, 0 , 0 ));
101+ result = 1 ;
102+ }
103+ }
104+ return result;
105+ }
106+
107+ //
108+ // llama.set_set_top_k(10.0)
109+ //
110+ static int cmd_llama_set_top_k (var_s *self, int argc, slib_par_t *arg, var_s *retval) {
111+ int result = 0 ;
112+ if (argc != 1 ) {
113+ error (retval, " llama.set_top_k" , 1 , 1 );
114+ } else {
115+ int id = get_class_id (self, retval);
116+ if (id != -1 ) {
117+ Llama &llama = g_map.at (id);
118+ llama.set_top_k (get_param_int (argc, arg, 0 , 0 ));
119+ result = 1 ;
120+ }
121+ }
122+ return result;
123+ }
124+
125+ static int cmd_llama_set_top_p (var_s *self, int argc, slib_par_t *arg, var_s *retval) {
126+ int result = 0 ;
127+ if (argc != 1 ) {
128+ error (retval, " llama.set_top_p" , 1 , 1 );
129+ } else {
130+ int id = get_class_id (self, retval);
131+ if (id != -1 ) {
132+ Llama &llama = g_map.at (id);
133+ llama.set_top_p (get_param_num (argc, arg, 0 , 0 ));
134+ result = 1 ;
135+ }
136+ }
137+ return result;
138+ }
139+
53140//
54141// print llama.chat("Hello")
55142//
56143static int cmd_llama_chat (var_s *self, int argc, slib_par_t *arg, var_s *retval) {
57144 int result = 0 ;
58- if (argc < 1 ) {
59- error (retval, " llama.chat" , 1 , 3 );
145+ if (argc != 1 ) {
146+ error (retval, " llama.chat" , 1 , 1 );
60147 } else {
61148 int id = get_class_id (self, retval);
62149 if (id != -1 ) {
63150 Llama &llama = g_map.at (id);
64151 auto prompt = get_param_str (argc, arg, 0 , " " );
65- int max_tokens = get_param_int (argc, arg, 1 , 32 );
66- var_num_t temperature = get_param_num (argc, arg, 2 , 0 .8f );
67152
68153 // build accumulated prompt
69154 string updated_prompt = llama.build_chat_prompt (prompt);
70155
71156 // run generation WITHOUT clearing cache
72- string response = llama.generate (updated_prompt, max_tokens, temperature );
157+ string response = llama.generate (updated_prompt);
73158
74159 // append assistant reply to history
75160 llama.append_response (response);
@@ -100,20 +185,18 @@ static int cmd_llama_reset(var_s *self, int argc, slib_par_t *arg, var_s *retval
100185}
101186
102187//
103- // print llama.generate("please generate as simple program in BASIC to draw a cat", 1024, 0.8 )
188+ // print llama.generate("please generate as simple program in BASIC to draw a cat")
104189//
105190static int cmd_llama_generate (var_s *self, int argc, slib_par_t *arg, var_s *retval) {
106191 int result = 0 ;
107- if (argc < 1 ) {
108- error (retval, " llama.generate" , 1 , 3 );
192+ if (argc != 1 ) {
193+ error (retval, " llama.generate" , 1 , 1 );
109194 } else {
110195 int id = get_class_id (self, retval);
111196 if (id != -1 ) {
112197 Llama &llama = g_map.at (id);
113198 auto prompt = get_param_str (argc, arg, 0 , " " );
114- int max_tokens = get_param_int (argc, arg, 1 , 32 );
115- var_num_t temperature = get_param_num (argc, arg, 2 , 0 .8f );
116- string response = llama.generate (prompt, max_tokens, temperature);
199+ string response = llama.generate (prompt);
117200 v_setstr (retval, response.c_str ());
118201 result = 1 ;
119202 }
@@ -124,12 +207,19 @@ static int cmd_llama_generate(var_s *self, int argc, slib_par_t *arg, var_s *ret
124207static int cmd_create_llama (int argc, slib_par_t *params, var_t *retval) {
125208 int result;
126209 auto model = expand_path (get_param_str (argc, params, 0 , " " ));
127- int n_ctx = get_param_int (argc, params, 0 , 2048 );
128- int disable_log = get_param_int (argc, params, 1 , 1 );
210+ auto n_ctx = get_param_int (argc, params, 0 , 2048 );
211+ auto n_batch = get_param_int (argc, params, 1 , 1024 );
212+ auto temperature = get_param_num (argc, params, 2 , 0.25 );
129213 int id = ++g_nextId;
130214 Llama &llama = g_map[id];
131- if (llama.construct (model, n_ctx, disable_log)) {
215+ if (llama.construct (model, n_ctx, n_batch)) {
216+ llama.set_temperature (temperature);
132217 map_init_id (retval, id, CLASS_ID);
218+ v_create_callback (retval, " set_max_tokens" , cmd_llama_set_max_tokens);
219+ v_create_callback (retval, " set_min_p" , cmd_llama_set_min_p);
220+ v_create_callback (retval, " set_temperature" , cmd_llama_set_temperature);
221+ v_create_callback (retval, " set_top_k" , cmd_llama_set_top_k);
222+ v_create_callback (retval, " set_top_p" , cmd_llama_set_top_p);
133223 v_create_callback (retval, " chat" , cmd_llama_chat);
134224 v_create_callback (retval, " generate" , cmd_llama_generate);
135225 v_create_callback (retval, " reset" , cmd_llama_reset);
0 commit comments