3939#define MAX_TOKENS_DEFAULT 256
4040#define RESPONSE_BUF_SIZE (512 * 1024) /* 512 KB response buffer */
4141#define SSE_CHUNK_SIZE 4096
42+ #define SOCKET_TIMEOUT_SEC 30 /* read timeout per socket */
43+ #define MAX_ACTIVE_CONNS 32 /* hard limit on threads */
4244
4345/* ============================================================
4446 * Server state
@@ -48,7 +50,8 @@ struct tq_server {
4850 tq_server_config_t config ;
4951 int listen_fd ;
5052 atomic_int running ;
51- pthread_mutex_t inference_mutex ; /* serialize inference (single model state) */
53+ atomic_int active_connections ; /* track concurrent threads */
54+ pthread_mutex_t inference_mutex ; /* serialize inference (single model state) */
5255};
5356
5457/* Global server pointer for signal handler */
@@ -281,12 +284,14 @@ static char* build_prompt(const chat_request_t* req) {
281284 if (!prompt ) return NULL ;
282285
283286 char * w = prompt ;
287+ size_t remaining = total ;
284288 for (int i = 0 ; i < req -> n_messages ; i ++ ) {
285- w += sprintf (w , "<|im_start|>%s\n%s<|im_end|>\n" ,
286- req -> messages [i ].role ,
287- req -> messages [i ].content ? req -> messages [i ].content : "" );
289+ int n = snprintf (w , remaining , "<|im_start|>%s\n%s<|im_end|>\n" ,
290+ req -> messages [i ].role ,
291+ req -> messages [i ].content ? req -> messages [i ].content : "" );
292+ if (n > 0 && (size_t )n < remaining ) { w += n ; remaining -= (size_t )n ; }
288293 }
289- w += sprintf ( w , "<|im_start|>assistant\n" );
294+ snprintf ( w , remaining , "<|im_start|>assistant\n" );
290295
291296 return prompt ;
292297}
@@ -841,9 +846,10 @@ static int read_http_request(int fd, char* buf, int buf_size, http_request_t* re
841846 const char * next = strstr (hp , "\r\n" );
842847 if (!next ) break ;
843848
844- /* Content-Length */
849+ /* Content-Length — use strtol to avoid UB on overflow */
845850 if (strncasecmp (hp , "Content-Length:" , 15 ) == 0 ) {
846- req -> content_length = atoi (hp + 15 );
851+ long cl = strtol (hp + 15 , NULL , 10 );
852+ req -> content_length = (cl > 0 && cl <= MAX_BODY_SIZE ) ? (int )cl : 0 ;
847853 }
848854 /* Content-Type */
849855 if (strncasecmp (hp , "Content-Type:" , 13 ) == 0 ) {
@@ -902,6 +908,11 @@ static void* handle_connection(void* arg) {
902908 tq_server_t * server = ctx -> server ;
903909 free (ctx );
904910
911+ /* Set socket read/write timeout to prevent slow-loris attacks */
912+ struct timeval sock_tv = { .tv_sec = SOCKET_TIMEOUT_SEC , .tv_usec = 0 };
913+ setsockopt (fd , SOL_SOCKET , SO_RCVTIMEO , & sock_tv , sizeof (sock_tv ));
914+ setsockopt (fd , SOL_SOCKET , SO_SNDTIMEO , & sock_tv , sizeof (sock_tv ));
915+
905916 char * buf = (char * )malloc (HTTP_BUF_SIZE + MAX_BODY_SIZE );
906917 if (!buf ) {
907918 close (fd );
@@ -946,6 +957,7 @@ static void* handle_connection(void* arg) {
946957
947958 close (fd );
948959 free (buf );
960+ atomic_fetch_sub (& server -> active_connections , 1 );
949961 return NULL ;
950962}
951963
@@ -991,6 +1003,7 @@ int tq_server_start(tq_server_t** out, const tq_server_config_t* config) {
9911003
9921004 server -> config = * config ;
9931005 atomic_store (& server -> running , 1 );
1006+ atomic_store (& server -> active_connections , 0 );
9941007 pthread_mutex_init (& server -> inference_mutex , NULL );
9951008
9961009 /* Install signal handlers */
@@ -1070,10 +1083,19 @@ int tq_server_start(tq_server_t** out, const tq_server_config_t* config) {
10701083 continue ;
10711084 }
10721085
1086+ /* Enforce connection limit to prevent resource exhaustion */
1087+ if (atomic_load (& server -> active_connections ) >= MAX_ACTIVE_CONNS ) {
1088+ LOG_ERROR ("Connection limit reached (%d), rejecting" , MAX_ACTIVE_CONNS );
1089+ close (client_fd );
1090+ continue ;
1091+ }
1092+ atomic_fetch_add (& server -> active_connections , 1 );
1093+
10731094 /* Spawn a thread for this connection */
10741095 conn_ctx_t * conn = (conn_ctx_t * )malloc (sizeof (conn_ctx_t ));
10751096 if (!conn ) {
10761097 close (client_fd );
1098+ atomic_fetch_sub (& server -> active_connections , 1 );
10771099 continue ;
10781100 }
10791101 conn -> server = server ;
@@ -1087,6 +1109,7 @@ int tq_server_start(tq_server_t** out, const tq_server_config_t* config) {
10871109 LOG_ERROR ("Failed to create thread: %s" , strerror (errno ));
10881110 close (client_fd );
10891111 free (conn );
1112+ atomic_fetch_sub (& server -> active_connections , 1 );
10901113 }
10911114 pthread_attr_destroy (& attr );
10921115 }
0 commit comments