Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

archiver.c

Go to the documentation of this file.
00001 
00014 /* {{{ Initial comments */
00015 /*
00016  * $LastChangedDate: 2004-04-01 18:34:17 +0200 (Thu, 01 Apr 2004) $
00017  * $LastChangedRevision: 50 $
00018  * $LastChangedBy: ckruse $
00019  *
00020  */
00021 /* }}} */
00022 
00023 /* {{{ Includes */
00024 #include "config.h"
00025 #include "defines.h"
00026 
00027 #include <stdlib.h>
00028 #include <stdio.h>
00029 #include <pthread.h>
00030 #include <string.h>
00031 #include <ctype.h>
00032 #include <errno.h>
00033 #include <unistd.h>
00034 
00035 #include <sys/stat.h>
00036 #include <sys/wait.h>
00037 #include <time.h>
00038 
00039 #include <gdome.h>
00040 
00041 #include "cf_pthread.h"
00042 
00043 #include "hashlib.h"
00044 #include "utils.h"
00045 #include "configparser.h"
00046 #include "readline.h"
00047 #include "fo_server.h"
00048 #include "serverlib.h"
00049 #include "xml_handling.h"
00050 #include "charconvert.h"
00051 #include "archiver.h"
00052 /* }}} */
00053 
00054 /* {{{ cf_delete_threadfile */
00055 void cf_delete_threadfile(t_thread *t) {
00056   t_name_value *msgdir = cfg_get_value(&fo_default_conf,"MessagePath");
00057   u_char buff[256];
00058 
00059   snprintf(buff,256,"%s/t%lld.xml",msgdir->values[0],t->tid);
00060   unlink(buff);
00061 }
00062 /* }}} */
00063 
00064 /* {{{ cf_get_time */
00065 time_t cf_get_time(GdomeNode *n) {
00066   time_t ret = 0;
00067   GdomeException e;
00068   GdomeDOMString *dstr = gdome_str_mkref("Date");
00069   GdomeNodeList  *nl   = gdome_el_getElementsByTagName((GdomeElement *)n,dstr,&e);
00070 
00071   if(nl) {
00072     GdomeDOMString    *ls_str = gdome_str_mkref("longSec");
00073     GdomeNode         *n1     = gdome_nl_item(nl,0,&e);
00074     GdomeNamedNodeMap *nnm    = gdome_n_attributes(n1,&e);
00075     GdomeNode         *date   = gdome_nnm_getNamedItem(nnm,ls_str,&e);
00076     u_char             *dt     = get_node_value(date);
00077 
00078     if(dt) {
00079       ret = strtol(dt,NULL,10);
00080       free(dt);
00081     }
00082 
00083     gdome_n_unref(date,&e);
00084     gdome_str_unref(ls_str);
00085     gdome_nnm_unref(nnm,&e);
00086     gdome_n_unref(n1,&e);
00087 
00088     gdome_nl_unref(nl,&e);
00089   }
00090 
00091   gdome_str_unref(dstr);
00092 
00093   return ret;
00094 }
00095 /* }}} */
00096 
00097 /* {{{ cf_make_path */
00098 void cf_make_path(u_char *path) {
00099   register u_char *ptr = path+1;
00100   int ret;
00101 
00102   /*
00103    * search whole string for a directory separator
00104    */
00105   for (;*ptr;ptr++) {
00106     /*
00107      * when a directory separator is given, create path 'till there
00108      */
00109     if (*ptr == '/') {
00110       *ptr = '\0';
00111       ret = mkdir(path,S_IRWXU|S_IRWXG|S_IRWXO);
00112       *ptr = '/';
00113 
00114       if (ret && errno != EEXIST) {
00115         cf_log(LOG_ERR,__FILE__,__LINE__,"mkdir: %s",strerror(errno));
00116         return;
00117       }
00118     }
00119   }
00120 
00121   return;
00122 }
00123 /* }}} */
00124 
00125 /* {{{ cf_archive_threads */
00126 void cf_archive_threads(t_thread **to_archive,int len) {
00127   t_posting *p,*p1;
00128   GdomeException e;
00129   GdomeDOMImplementation *impl = gdome_di_mkref();
00130   GdomeDocument *doc1          = NULL,*doc2;
00131   t_name_value  *apath         = cfg_get_value(&fo_default_conf,"ArchivePath");
00132 
00133   GdomeDOMString *str_thread   = gdome_str_mkref("Thread");
00134 
00135   GdomeElement *thread1;
00136   GdomeElement *thread2;
00137   GdomeElement *msgcnt;
00138 
00139   GdomeNodeList *nl;
00140   GdomeNode *parent_thr;
00141 
00142   struct tm *tm;
00143   int i,j,nlen,mon = -1,year = -1;
00144   u_char buff[256];
00145   struct stat st;
00146 
00147   for(i=0;i<len;i++) {
00148     parent_thr = NULL;
00149     tm = localtime(&to_archive[i]->postings->date);
00150 
00151     if(tm->tm_mon != mon || tm->tm_year != year) {
00152       if(doc1) {
00153         snprintf(buff,256,"%s/%d/%d/index.xml",apath->values[0],year+1900,mon+1);
00154         if(!gdome_di_saveDocToFile(impl,doc1,buff,0,&e)) {
00155           cf_log(LOG_ERR,__FILE__,__LINE__,"ERROR! COULD NOT WRITE XML FILE!\n");
00156         }
00157         gdome_doc_unref(doc1,&e);
00158       }
00159 
00160       snprintf(buff,256,"%s/%d/%d/index.xml",apath->values[0],tm->tm_year+1900,tm->tm_mon+1);
00161       cf_make_path(buff);
00162 
00163       if(stat(buff,&st) == 0) {
00164         if((doc1 = gdome_di_createDocFromURI(impl,buff,GDOME_LOAD_PARSING,&e)) == NULL) {
00165           cf_log(LOG_ERR,__FILE__,__LINE__,"ERROR! COULD NOT READ XML FILE!\n");
00166           return;
00167         }
00168       }
00169       else {
00170         doc1 = xml_create_doc(impl,FORUM_DTD);
00171       }
00172 
00173       mon  = tm->tm_mon;
00174       year = tm->tm_year;
00175     }
00176 
00177     cf_log(LOG_STD,__FILE__,__LINE__,"Archiver: archiving thread t%lld\n",to_archive[i]->tid);
00178 
00179     doc2    = xml_create_doc(impl,FORUM_DTD);
00180 
00181     /* first lets create the necessary elements */
00182     thread1 = xml_create_element(doc1,"Thread");
00183     thread2 = xml_create_element(doc2,"Thread");
00184     msgcnt  = xml_create_element(doc2,"ContentList");
00185 
00186     /* lets set the tid attribute */
00187     sprintf(buff,"t%lld",to_archive[i]->tid);
00188     xml_set_attribute(thread1,"id",buff);
00189     xml_set_attribute(thread2,"id",buff);
00190 
00191     /* find the previous sibling of the thread: search all thread elements, ... */
00192     nl   = gdome_doc_getElementsByTagName(doc1,str_thread,&e);
00193     nlen = gdome_nl_length(nl,&e);
00194 
00195     /* and then search the right thread element */
00196     for(j=0;j<nlen;j++) {
00197       parent_thr = gdome_nl_item(nl,j,&e);
00198       if(cf_get_time(parent_thr) < to_archive[i]->postings->date) {
00199         break;
00200       }
00201 
00202       gdome_n_unref(parent_thr,&e);
00203     }
00204 
00205     stringify_posting(doc1,thread1,doc2,thread2,to_archive[i]->postings);
00206 
00207     if(j == nlen || j == 0) {
00208       GdomeElement *root = gdome_doc_documentElement(doc1,&e);
00209       gdome_el_appendChild(root,(GdomeNode *)thread1,&e);
00210       gdome_el_unref(root,&e);
00211     }
00212     else {
00213       GdomeElement *root = gdome_doc_documentElement(doc1,&e);
00214       gdome_n_insertBefore((GdomeNode *)root,(GdomeNode *)thread1,parent_thr,&e);
00215       gdome_el_unref(root,&e);
00216     }
00217 
00218     for(p=to_archive[i]->postings;p;p=p1) {
00219       GdomeDOMString *str;
00220       GdomeElement *el = xml_create_element(doc2,"MessageContent");
00221       GdomeCDATASection *cd;
00222 
00223       sprintf(buff,"m%lld",p->mid);
00224       xml_set_attribute(el,"mid",buff);
00225 
00226       str = gdome_str_mkref_dup(p->content);
00227 
00228       cd = gdome_doc_createCDATASection(doc2,str,&e);
00229       gdome_el_appendChild(el,(GdomeNode *)cd,&e);
00230       gdome_el_appendChild(msgcnt,(GdomeNode *)el,&e);
00231 
00232       gdome_str_unref(str);
00233       gdome_el_unref(el,&e);
00234       gdome_cds_unref(cd,&e);
00235 
00236       free(p->user.name);
00237       free(p->user.ip);
00238       free(p->subject);
00239       free(p->unid);
00240       free(p->content);
00241       if(p->user.email) free(p->user.email);
00242       if(p->user.img)   free(p->user.img);
00243       if(p->user.hp)    free(p->user.hp);
00244       if(p->category)   free(p->category);
00245 
00246       p1 = p->next;
00247       free(p);
00248     }
00249 
00250     if(parent_thr) gdome_n_unref(parent_thr,&e);
00251 
00252     parent_thr = (GdomeNode *)gdome_doc_documentElement(doc2,&e);
00253     gdome_n_appendChild(parent_thr,(GdomeNode *)thread2,&e);
00254     gdome_n_appendChild(parent_thr,(GdomeNode *)msgcnt,&e);
00255     gdome_n_unref(parent_thr,&e);
00256 
00257     gdome_nl_unref(nl,&e);
00258     gdome_el_unref(msgcnt,&e);
00259     gdome_el_unref(thread1,&e);
00260     gdome_el_unref(thread2,&e);
00261 
00262     cf_rwlock_destroy(&to_archive[i]->lock);
00263     cf_delete_threadfile(to_archive[i]);
00264 
00265     snprintf(buff,256,"%s/%d/%d/t%lld.xml",apath->values[0],year+1900,mon+1,to_archive[i]->tid);
00266     if(!gdome_di_saveDocToFile(impl,doc2,buff,0,&e)) {
00267       cf_log(LOG_ERR,__FILE__,__LINE__,"ERROR! COULD NOT WRITE XML FILE!\n");
00268     }
00269     gdome_doc_unref(doc2,&e);
00270 
00271     free(to_archive[i]);
00272   }
00273 
00274   snprintf(buff,256,"%s/%d/%d/index.xml",apath->values[0],year+1900,mon+1);
00275   if(!gdome_di_saveDocToFile(impl,doc1,buff,0,&e)) {
00276     cf_log(LOG_ERR,__FILE__,__LINE__,"ERROR! COULD NOT WRITE XML FILE!\n");
00277   }
00278   gdome_doc_unref(doc1,&e);
00279 
00280   gdome_str_unref(str_thread);
00281   gdome_di_unref(impl,&e);
00282 }
00283 /* }}} */
00284 
00285 /* {{{ cf_run_archiver_and_write_to_disk
00286  * Returns: nothing
00287  * Parameters:
00288  *
00289  * This function writes everything to disk and runs the archiver.
00290  *
00291  */
00292 void cf_run_archiver_and_write_to_disk(void) {
00293   t_thread *t,*oldest_t,*prev = NULL,**to_archive = NULL,*oldest_prev;
00294   t_posting *oldest,*newest_in_t;
00295   long size,threadnum,pnum,max_bytes,max_threads,max_posts;
00296   int shall_archive = 0,len = 0,ret = FLT_OK;
00297   t_name_value *max_bytes_v     = cfg_get_value(&fo_server_conf,"MainFileMaxBytes");
00298   t_name_value *max_posts_v     = cfg_get_value(&fo_server_conf,"MainFileMaxPostings");
00299   t_name_value *max_threads_v   = cfg_get_value(&fo_server_conf,"MainFileMaxThreads");
00300   GdomeDOMImplementation *impl = gdome_di_mkref();
00301   GdomeException e;
00302   GdomeDocument *doc           = xml_create_doc(impl,FORUM_DTD);
00303   GdomeElement *el             = gdome_doc_documentElement(doc,&e);
00304   t_name_value  *mpath         = cfg_get_value(&fo_default_conf,"MessagePath");
00305   u_char buff[256];
00306   pid_t pid;
00307   int status;
00308   size_t i;
00309   t_handler_config *handler;
00310   t_archive_filter fkt;
00311 
00312   max_bytes   = strtol(max_bytes_v->values[0],NULL,10);
00313   max_posts   = strtol(max_posts_v->values[0],NULL,10);
00314   max_threads = strtol(max_threads_v->values[0],NULL,10);
00315 
00316   do {
00317     CF_RW_RD(&head.lock);
00318 
00319     size          = head.cache_invisible.len;
00320     t             = head.thread;
00321     threadnum     = 0;
00322     pnum          = 0;
00323     shall_archive = 0;
00324     oldest        = NULL;
00325     oldest_t      = NULL;
00326     oldest_prev   = NULL;
00327     newest_in_t   = NULL;
00328 
00329     CF_RW_UN(&head.lock);
00330 
00331     if(!t) return;
00332 
00333     /* since we have exclusive access to the messages, we need no longer locking to the messages itself */
00334     do {
00335       threadnum++;
00336 
00337       CF_RW_RD(&t->lock);
00338 
00339       newest_in_t = t->newest;
00340       pnum       += t->posts;
00341 
00342       if(!oldest || newest_in_t->date < oldest->date) {
00343         oldest      = newest_in_t;
00344         oldest_t    = t;
00345         oldest_prev = prev;
00346       }
00347 
00348       prev = t;
00349       t    = t->next;
00350 
00351       CF_RW_UN(&prev->lock);
00352     } while(t);
00353 
00354     /* ok, we went through the hole threadlist. There we cannot slice very good, so yield */
00355     pthread_yield();
00356 
00357     if(size > max_bytes) {
00358       shall_archive = 1;
00359       cf_log(LOG_STD,__FILE__,__LINE__,"Archiver: Criterium: max bytes, Values: Config: %ld, Real: %ld\n",max_bytes,size);
00360     }
00361     if(pnum > max_posts) {
00362       shall_archive = 1;
00363       cf_log(LOG_STD,__FILE__,__LINE__,"Archiver: Criterium: max posts, Values: Config: %ld, Real: %ld\n",max_posts,pnum);
00364     }
00365     if(threadnum > max_threads) {
00366       shall_archive = 1;
00367       cf_log(LOG_STD,__FILE__,__LINE__,"Archiver: Criterium: max threads, Values: Config: %ld, Real: %ld\n",max_threads,threadnum);
00368     }
00369 
00370     if(shall_archive) {
00371       to_archive        = fo_alloc(to_archive,++len,sizeof(t_thread *),FO_ALLOC_REALLOC);
00372       to_archive[len-1] = oldest_t;
00373 
00374       /*
00375        * This action is synchronized due to a mutex. So if the thread is
00376        * unregistered and pointers are re-set, everything is safe...
00377        */
00378       cf_unregister_thread(oldest_t);
00379 
00380       /*
00381        * if we lock oldest_t before oldest_prev this
00382        * could cause a dead lock or some undefined behavior
00383        */
00384       if(oldest_prev) CF_RW_WR(&oldest_prev->lock);
00385 
00386       CF_RW_WR(&oldest_t->lock);
00387 
00388       if(oldest_prev) {
00389         oldest_prev->next       = oldest_t->next;
00390         if(oldest_prev->next) oldest_prev->next->prev = oldest_prev; /* is NULL if the last thread is being archived */
00391 
00392         CF_RW_UN(&oldest_prev->lock);
00393       }
00394       else {
00395         CF_RW_WR(&head.lock);
00396         head.thread = oldest_t->next;
00397         CF_RW_UN(&head.lock);
00398       }
00399 
00400       /* all references to this thread are released, so run the archiver plugins */
00401       if(Modules[ARCHIVE_HANDLER].elements) {
00402         ret = FLT_OK;
00403 
00404         for(i=0;i<Modules[ARCHIVE_HANDLER].elements && (ret == FLT_DECLINE || ret == FLT_OK);i++) {
00405           handler = array_element_at(&Modules[ARCHIVE_HANDLER],i);
00406           fkt     = (t_archive_filter)handler->func;
00407           ret     = fkt(oldest_t);
00408         }
00409       }
00410 
00411       if(ret == FLT_EXIT) {
00412         t_posting *p,*p1;
00413         for(p=to_archive[len-1]->postings;p;p=p1) {
00414           free(p->user.name);
00415           free(p->user.ip);
00416           free(p->subject);
00417           free(p->unid);
00418           free(p->content);
00419           if(p->user.email) free(p->user.email);
00420           if(p->user.img) free(p->user.img);
00421           if(p->user.hp) free(p->user.hp);
00422           if(p->category) free(p->category);
00423 
00424           p1 = p->next;
00425           free(p);
00426         }
00427 
00428         free(to_archive[len-1]);
00429         to_archive = fo_alloc(to_archive,--len,sizeof(t_thread *),FO_ALLOC_REALLOC);
00430       }
00431     }
00432   } while(shall_archive);
00433 
00434 
00435   /* after archiving, we re-generate the cache */
00436   cf_generate_cache(NULL);
00437 
00438   /* ok, this may have token some time, so yield... */
00439   pthread_yield();
00440 
00441   cf_log(LOG_STD,__FILE__,__LINE__,"archiver ran. Writing threadlists...\n");
00442 
00443   CF_RW_RD(&head.lock);
00444   t = head.thread;
00445   CF_RW_UN(&head.lock);
00446 
00447   sprintf(buff,"m%lld",head.mid);
00448   xml_set_attribute(el,"lastMessage",buff);
00449 
00450   sprintf(buff,"t%lld",head.tid);
00451   xml_set_attribute(el,"lastThread",buff);
00452 
00453   gdome_el_unref(el,&e);
00454 
00455   /*
00456    * *very* nasty workaround for a memory leek in the gdome lib
00457    *
00458    * Hm, this has a nice effect: normally when the archiver runs, the
00459    * server would lag a little bit. But since all expensive operations
00460    * are done in a child process, the server itself would not lag longer
00461    */
00462   pid = fork();
00463   switch(pid) {
00464   case -1:
00465     cf_log(LOG_ERR,__FILE__,__LINE__,"fork: %s\n",strerror(errno));
00466     break;
00467 
00468   case 0:
00469     /* we write the threadlist */
00470     while(t) {
00471       /* we need no locking in the child process */
00472       stringify_thread_and_write_to_disk(doc,t);
00473       t = t->next;
00474     };
00475 
00476     snprintf(buff,256,"%s/forum.xml",mpath->values[0]);
00477 
00478     if(!gdome_di_saveDocToFile(impl,doc,buff,0,&e)) {
00479       cf_log(LOG_ERR,__FILE__,__LINE__,"ERROR! COULD NOT WRITE XML FILE!\n");
00480     }
00481     gdome_doc_unref(doc,&e);
00482 
00483     exit(0);
00484     break;
00485 
00486   default:
00487     cf_log(LOG_STD,__FILE__,__LINE__,"writing threadlist...\n");
00488     waitpid(pid,&status,0);
00489     cf_log(LOG_STD,__FILE__,__LINE__,"finished writing threadlist!\n");
00490     break;
00491   }
00492 
00493   gdome_doc_unref(doc,&e);
00494 
00495   if(len) {
00496     long i;
00497     pid = fork();
00498 
00499     /* nasty workaround... */
00500     switch(pid) {
00501     case -1:
00502       cf_log(LOG_ERR,__FILE__,__LINE__,"fork: %s\n",strerror(errno));
00503       break;
00504     case 0:
00505       cf_archive_threads(to_archive,len);
00506       exit(0);
00507     default:
00508       cf_log(LOG_STD,__FILE__,__LINE__,"waiting for archive_threads\n");
00509       waitpid(pid,&status,0);
00510       cf_log(LOG_STD,__FILE__,__LINE__,"archive_threads finished!\n");
00511       break;
00512     }
00513 
00514     for(i=0;i<len;i++) {
00515       t_posting *p = NULL,*p1 = NULL;
00516 
00517       for(p=to_archive[i]->postings;p;p=p1) {
00518         free(p->user.name);
00519         free(p->user.ip);
00520         free(p->subject);
00521         free(p->unid);
00522         free(p->content);
00523         if(p->user.email) free(p->user.email);
00524         if(p->user.img) free(p->user.img);
00525         if(p->user.hp) free(p->user.hp);
00526         if(p->category) free(p->category);
00527 
00528         p1 = p->next;
00529         free(p);
00530       }
00531 
00532       CF_RW_UN(&to_archive[i]->lock);
00533       cf_rwlock_destroy(&to_archive[i]->lock);
00534       free(to_archive[i]);
00535     }
00536 
00537     free(to_archive);
00538   }
00539 
00540   gdome_di_unref(impl,&e);
00541 }
00542 /* }}} */
00543 
00544 /* {{{ cf_archive_thread */
00545 void cf_archive_thread(int sockfd,u_int64_t tid) {
00546   t_thread *t = NULL,*prev = NULL,**list = NULL;
00547   t_posting *p,*p1;
00548   pid_t pid;
00549   int status;
00550 
00551   CF_RW_RD(&head.lock);
00552   t = head.thread;
00553   CF_RW_UN(&head.lock);
00554 
00555   if(t->tid != tid) {
00556     do {
00557       CF_RW_RD(&t->lock);
00558 
00559       prev = t;
00560       t    = t->next;
00561 
00562       CF_RW_UN(&prev->lock);
00563     } while(t && t->tid != tid);
00564   }
00565 
00566   if(!t) {
00567     writen(sockfd,"404 Thread Not Found\n",21);
00568     cf_log(LOG_ERR,__FILE__,__LINE__,"Thread not found\n");
00569     return;
00570   }
00571   else {
00572     list  = fo_alloc(NULL,1,sizeof(t_thread **),FO_ALLOC_MALLOC);
00573     *list = t;
00574 
00575     cf_unregister_thread(t);
00576 
00577     if(!prev) {
00578       CF_RW_WR(&head.lock);
00579       head.thread = head.thread->next;
00580       CF_RW_UN(&head.lock);
00581     }
00582     else {
00583       prev->next = t->next;
00584     }
00585 
00586     pid = fork();
00587     switch(pid) {
00588     case -1:
00589       cf_log(LOG_ERR,__FILE__,__LINE__,"fork: %s\n",strerror(errno));
00590       break;
00591     case 0:
00592       cf_archive_threads(list,1);
00593       exit(0);
00594     default:
00595       cf_log(LOG_STD,__FILE__,__LINE__,"waiting for archiver...\n");
00596       waitpid(pid,&status,0);
00597       cf_log(LOG_STD,__FILE__,__LINE__,"archiver finished!\n");
00598     }
00599 
00600     for(p=t->postings;p;p=p1) {
00601       free(p->user.name);
00602       free(p->user.ip);
00603       free(p->subject);
00604       free(p->unid);
00605       free(p->content);
00606       if(p->user.email) free(p->user.email);
00607       if(p->user.img) free(p->user.img);
00608       if(p->user.hp) free(p->user.hp);
00609       if(p->category) free(p->category);
00610 
00611       p1 = p->next;
00612       free(p);
00613     }
00614 
00615     cf_rwlock_destroy(&t->lock);
00616     free(t);
00617     free(list);
00618 
00619     writen(sockfd,"200 Ok\n",7);
00620   }
00621 }
00622 /* }}} */
00623 
00624 /* eof */

Generated on Sun Apr 25 16:37:36 2004 for Classic Forum by doxygen 1.3.5