Main Page | Modules | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

validate.c

Go to the documentation of this file.
00001 
00011 /* {{{ Initial comments */
00012 /*
00013  * $LastChangedDate$
00014  * $LastChangedRevision$
00015  * $LastChangedBy$
00016  *
00017  */
00018 /* }}} */
00019 
00020 /* {{{ Includes */
00021 #include "config.h"
00022 #include "defines.h"
00023 
00024 #include <stdlib.h>
00025 #include <stdio.h>
00026 #include <string.h>
00027 #include <ctype.h>
00028 
00029 #include <idna.h>
00030 
00031 #include "utils.h"
00032 #include "validate.h"
00033 /* }}} */
00034 
00035 static int is_valid_http_link_check(const u_char *link);
00036 
00037 /* {{{ scheme_list */
00038 static const t_scheme_list scheme_list[] = {
00039   { "http",     is_valid_http_link_check },
00040   { "https",    is_valid_http_link_check },
00041   { "mailto",   is_valid_mailto_link },
00042   { "wais",     is_valid_wais_link },
00043   { "prospero", is_valid_prospero_link },
00044   { "telnet",   is_valid_telnet_link },
00045   { "nntp",     is_valid_nntp_link },
00046   { "news",     is_valid_news_link },
00047   { "gopher",   is_valid_gopher_link },
00048   { "\0", NULL }
00049 };
00050 /* }}} */
00051 
00052 /* {{{ isextra */
00053 static int isextra(u_char c) {
00054   switch(c) {
00055     case '!':
00056     case '*':
00057     case '\'':
00058     case '(':
00059     case ')':
00060     case ',':
00061       return 1;
00062   }
00063 
00064   return 0;
00065 }
00066 /* }}} */
00067 
00068 /* {{{ issafe */
00069 static int issafe(u_char c) {
00070   switch(c) {
00071     case '$':
00072     case '-':
00073     case '_':
00074     case '.':
00075     case '+':
00076       return 1;
00077   }
00078 
00079   return 0;
00080 }
00081 /* }}} */
00082 
00083 /* {{{ isuchar_wo_escape */
00084 static int isuchar_wo_escape(u_char c) {
00085   /* escape is not checked! */
00086   if((!isalnum(c) || !isascii(c)) && !issafe(c) && !isextra(c)) return 0;
00087   return 1;
00088 }
00089 /* }}} */
00090 
00091 /* {{{ is_valid_hostname */
00092 int is_valid_hostname(const u_char *hostname) {
00093   register const u_char *ptr;
00094   u_char *out = NULL;
00095   int count = 0;
00096 
00097   for(ptr=hostname;*ptr;ptr++) {
00098     if(!isalnum(*ptr) || !isascii(*ptr)) {
00099       if(*ptr == '.') {
00100         ++count;
00101         /* two dots after another are not allowed, neither a dot at the beginning */
00102         if(ptr == hostname || *(ptr-1) == '.') return -1;
00103       }
00104       else continue;
00105 
00106       /* hu, we got a not-alnum ascii character. IDN? */
00107       if(idna_to_ascii_8z(hostname,(char **)&out,IDNA_USE_STD3_ASCII_RULES) != IDNA_SUCCESS) return -1;
00108 
00109       /* idna did the check for us; it's valid */
00110       free(out);
00111       return 0;
00112     }
00113   }
00114 
00115   /* ok, all characters are alnum and ascii or dots. Hostname is valid */
00116   return count == 0 ? -1 : 0;
00117 }
00118 /* }}} */
00119 
00120 /* {{{ is_valid_http_link_check */
00121 static int is_valid_http_link_check(const u_char *link) {
00122   /* we allow anchors by default */
00123   return is_valid_http_link(link,0);
00124 }
00125 /* }}} */
00126 
00127 /* {{{ is_valid_http_link */
00128 int is_valid_http_link(const u_char *link,int strict) {
00129   register const u_char *ptr;
00130   u_char *begin = NULL,*end = NULL,*helper;
00131 
00132   /* first we check if the scheme is valid */
00133   if(cf_strncmp(link,"http://",7)) {
00134     if(cf_strncmp(link,"https://",8)) return -1;
00135 
00136     ptr = link+8;
00137   }
00138   else ptr = link + 7;
00139 
00140   begin = ptr;
00141 
00142   /* ok, it seems as if scheme is valid -- get hostname */
00143   for(;*ptr;ptr++) {
00144     if(*ptr == ':' || *ptr == '/' || *ptr == '#' || *ptr == '?') {
00145       end = ptr-1;
00146       break;
00147     }
00148   }
00149 
00150   /* URL consists only of a hostname if end is NULL */
00151   if(end == NULL) {
00152     end = ptr-1;
00153   }
00154 
00155   helper = strndup(begin,end-begin+1);
00156   if(is_valid_hostname(helper) == -1) {
00157     /* ups, no valid hostname -- die */
00158     free(helper);
00159     return -1;
00160   }
00161 
00162   free(helper);
00163 
00164   /* hostname is valid; follows a port? */
00165   if(*ptr == ':') {
00166     /* port has to be digit+ */
00167     for(begin=++ptr;*ptr;ptr++) {
00168       /* port is valid (at least one digit followed by a slash) */
00169       if(*ptr == '/' && ptr > begin) break;
00170 
00171       /* hu? port must be digits... bad boy! */
00172       if(!isdigit(*ptr)) return -1;
00173     }
00174   }
00175 
00176   /* follows a host path? */
00177   if(*ptr == '/') {
00178     for(begin=ptr;*ptr;ptr++) {
00179       /* escape sequenz */
00180       if(*ptr == '%') {
00181         if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00182         ptr += 2;
00183         continue;
00184       }
00185 
00186       /* anchor means end of host path */
00187       if(*ptr == '#') break;
00188 
00189       if(!isuchar_wo_escape(*ptr)) {
00190         switch(*ptr) {
00191           case '/':
00192           case ';':
00193           case ':':
00194           case '@':
00195           case '&':
00196           case '=':
00197             continue;
00198           default:
00199             if(*ptr != '?') return -1;
00200         }
00201 
00202         /* could only be a question mark (end of host path) */
00203         break;
00204       }
00205     }
00206   }
00207 
00208   /* follows a search path? */
00209   if(*ptr == '?') {
00210     for(++ptr;*ptr;ptr++) {
00211       /* escaped character */
00212       if(*ptr == '%') {
00213         if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00214         ptr += 2;
00215         continue;
00216       }
00217 
00218       /* anchor means end of search path */
00219       if(*ptr == '#') break;
00220 
00221       /* we checked for escaped before */
00222       if(!isuchar_wo_escape(*ptr)) {
00223         switch(*ptr) {
00224           case ';':
00225           case ':':
00226           case '@':
00227           case '&':
00228           case '=':
00229             break;
00230           default:
00231             /* no anchor in strict mode */
00232             return -1;
00233         }
00234       }
00235     }
00236   }
00237 
00238   if(*ptr == '#') {
00239     if(strict) return -1;
00240 
00241     for(ptr++;*ptr;ptr++) {
00242       if(*ptr == '%') {
00243         if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00244         ptr += 2;
00245         continue;
00246       }
00247 
00248       if(!isuchar_wo_escape(*ptr)) {
00249         switch(*ptr) {
00250           case ';':
00251           case '/':
00252           case '?':
00253           case ':':
00254           case '@':
00255           case '&':
00256           case '=':
00257             break;
00258           default:
00259             return -1;
00260         }
00261       }
00262     }
00263   }
00264 
00265   return 0;
00266 }
00267 /* }}} */
00268 
00269 /* {{{ is_valid_mailto_link */
00270 int is_valid_mailto_link(const u_char *addr) {
00271   /* first, check if mail address begins with mailto: */
00272   if(cf_strncmp(addr,"mailto:",7) == 0) {
00273     /* let is_valid_mailaddress() do the checks... */
00274     return is_valid_mailaddress(addr+7);
00275   }
00276 
00277   /* not a mailto:-scheme -- bad guy! invalid! */
00278   return -1;
00279 }
00280 /* }}} */
00281 
00282 /* {{{ is_valid_mailaddress */
00283 /*
00284  * Algorithm is in parts from 'Secure Cooking With C and C++'
00285  */
00286 int is_valid_mailaddress(const u_char *address) {
00287   register const u_char *c, *domain;
00288   static u_char *rfc822_specials = "()<>@,;:\\\"[]";
00289 
00290   /* first we validate the name portion (name@domain) */
00291   for(c=address;*c;c++) {
00292     if(*c == '\"' && (c == address || *(c - 1) == '.' || *(c - 1) == '\"')) {
00293       while (*++c) {
00294         if (*c == '\"') break;
00295         if (*c == '\\' && (*++c == ' ')) continue;
00296         if (*c <= ' ' || *c >= 127) return -1;
00297       }
00298 
00299       if(!*c++) return -1;
00300       if(*c == '@') break;
00301       if(*c != '.') return -1;
00302       continue;
00303     }
00304 
00305     if(*c == '@') break;
00306     if(*c <= ' ' || *c >= 127) return -1;
00307     if(strchr(rfc822_specials, *c)) return -1;
00308   }
00309 
00310   if(c == address || *(c - 1) == '.') return -1;
00311 
00312   /* next we validate the domain portion (name@domain) */
00313   if (!*(domain = ++c)) return -1;
00314 
00315   /*
00316    * we also have to accept Umlauts domains, which means we have
00317    * to check domain name more complicated than the secure cooking
00318    * does
00319    */
00320   return is_valid_hostname(domain);
00321 }
00322 /* }}} */
00323 
00324 /* {{{ is_valid_wais_link */
00325 int is_valid_wais_link(const u_char *link) {
00326   register const u_char *ptr;
00327   const u_char *begin = NULL,*end = NULL;
00328   u_char *helper;
00329   unsigned int slashes;
00330 
00331   /* first we check if the scheme is valid */
00332   if(cf_strncmp(link,"wais://",7)) {
00333     return -1;
00334   }
00335   else ptr = (u_char *)link + 7;
00336 
00337   begin = ptr;
00338 
00339   /* ok, it seems as if scheme is valid -- get hostname */
00340   for(;*ptr;ptr++) {
00341     if(*ptr == ':' || *ptr == '/' || *ptr == '?') {
00342       end = ptr-1;
00343       break;
00344     }
00345   }
00346 
00347   /* URL consists only of a hostname if end is NULL */
00348   if(end == NULL) {
00349     end = ptr-1;
00350   }
00351 
00352   helper = strndup(begin,end-begin+1);
00353   if(is_valid_hostname(helper) == -1) {
00354     /* ups, no valid hostname -- die */
00355     free(helper);
00356     return -1;
00357   }
00358 
00359   free(helper);
00360 
00361   /* hostname is valid; follows a port? */
00362   if(*ptr == ':') {
00363     /* port has to be digit+ */
00364     for(begin=++ptr;*ptr;ptr++) {
00365       /* port is valid (at least one digit followed by a slash) */
00366       if(*ptr == '/' && ptr > begin) break;
00367 
00368       /* hu? port must be digits... bad boy! */
00369       if(!isdigit(*ptr)) return -1;
00370     }
00371   }
00372 
00373   /* does not follow host path, no wais url*/
00374   if(*ptr++ != '/') return -1;
00375   
00376   slashes = 0;
00377   for(begin=ptr;*ptr;ptr++) {
00378     /* escape sequenz */
00379     if(*ptr == '%') {
00380       if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00381       ptr += 2;
00382       continue;
00383     }
00384     
00385   
00386     if(!isuchar_wo_escape(*ptr)) {
00387       switch(*ptr) {
00388         case '/':
00389           slashes++;
00390           if(slashes > 2) return -1;
00391           continue;
00392         default:
00393           if(*ptr != '?' || slashes) return -1;
00394       }
00395       /* could only be a question mark (end of host path) */
00396       break;
00397     }
00398   }
00399   
00400   /* three forms of wais url
00401      1. wais://hostport/database   (slashes == 0, !*ptr)
00402      2. wais://hostport/database/wtype/wpath (slashes == 2, !*ptr)
00403      3. wais://hostport/database?search (slashes == 0, *ptr == '?')
00404    */
00405   if(slashes && (slashes != 2 || *ptr)) return -1;
00406 
00407   /* follows a search path? */
00408   if(*ptr == '?') {
00409     for(++ptr;*ptr;ptr++) {
00410       /* escaped character */
00411       if(*ptr == '%') {
00412         if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00413         ptr += 2;
00414         continue;
00415       }
00416 
00417       /* we checked for escaped before */
00418       if(!isuchar_wo_escape(*ptr)) {
00419         switch(*ptr) {
00420           case ';':
00421           case ':':
00422           case '@':
00423           case '&':
00424           case '=':
00425             break;
00426           default:
00427             /* no anchor in strict mode */
00428             return -1;
00429         }
00430       }
00431     }
00432   }
00433   
00434   /* now url should definitely be at an end! */
00435   if (*ptr) return -1;
00436   
00437   return 0;
00438 
00439 }
00440 /* }}} */
00441 
00442 /* {{{ is_valid_prospero_link */
00443 int is_valid_prospero_link(const u_char *link) {
00444   register const u_char *ptr;
00445   const u_char *begin = NULL,*end = NULL;
00446   u_char *helper;
00447   int had_cur_equal = 0;
00448 
00449   /* first we check if the scheme is valid */
00450   if(cf_strncmp(link,"prospero://",11)) {
00451     return -1;
00452   }
00453   else ptr = link + 11;
00454 
00455   begin = ptr;
00456 
00457   /* ok, it seems as if scheme is valid -- get hostname */
00458   for(;*ptr;ptr++) {
00459     if(*ptr == ':' || *ptr == '/') {
00460       end = ptr-1;
00461       break;
00462     }
00463   }
00464 
00465   /* URL consists only of a hostname if end is NULL */
00466   if(end == NULL) {
00467     end = ptr-1;
00468   }
00469 
00470   helper = strndup(begin,end-begin+1);
00471   if(is_valid_hostname(helper) == -1) {
00472     /* ups, no valid hostname -- die */
00473     free(helper);
00474     return -1;
00475   }
00476 
00477   free(helper);
00478 
00479   /* hostname is valid; follows a port? */
00480   if(*ptr == ':') {
00481     /* port has to be digit+ */
00482     for(begin=++ptr;*ptr;ptr++) {
00483       /* port is valid (at least one digit followed by a slash) */
00484       if(*ptr == '/' && ptr > begin) break;
00485 
00486       /* hu? port must be digits... bad boy! */
00487       if(!isdigit(*ptr)) return -1;
00488     }
00489   }
00490 
00491   /* does not follow host path, no prospero url*/
00492   if(*ptr++ != '/') return -1;
00493   
00494   for(begin=ptr;*ptr;ptr++) {
00495     /* escape sequenz */
00496     if(*ptr == '%') {
00497       if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00498       ptr += 2;
00499       continue;
00500     }
00501     
00502   
00503     if(!isuchar_wo_escape(*ptr)) {
00504       switch(*ptr) {
00505         case '/':
00506         case '?':
00507         case ':':
00508         case '@':
00509         case '&':
00510         case '=':
00511           continue;
00512         default:
00513           if(*ptr != ';') return -1;
00514       }
00515       /* could only be a semicolon (end of host path) */
00516       break;
00517     }
00518   }
00519   
00520   /* follows a fieldspec? */
00521   if(*ptr == ';') {
00522     for(++ptr;*ptr;ptr++) {
00523       /* escaped character */
00524       if(*ptr == '%') {
00525         if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00526         ptr += 2;
00527         continue;
00528       }
00529 
00530       /* we checked for escaped before */
00531       if(!isuchar_wo_escape(*ptr)) {
00532         switch(*ptr) {
00533           case ';':
00534             if(!had_cur_equal) return -1;
00535             had_cur_equal = 0;
00536             break;
00537           case '=':
00538             if(had_cur_equal) return -1;
00539             had_cur_equal = 1;
00540             break;
00541           case '?':
00542           case ':':
00543           case '@':
00544           case '&':
00545             break;
00546           default:
00547             /* no anchor in strict mode */
00548             return -1;
00549         }
00550       }
00551     }
00552   }
00553   
00554   /* now url should definitely be at an end! */
00555   if (*ptr) return -1;
00556   
00557   return 0;
00558 }
00559 /* }}} */
00560 
00561 /* {{{ is_valid_telnet_link */
00562 int is_valid_telnet_link(const u_char *link) {
00563   register const u_char *ptr;
00564   const u_char *anchor,*doubledot;
00565   u_char *hostname;
00566 
00567   if(cf_strncmp(link,"telnet://",8)) return -1;
00568 
00569   if((anchor = strstr(link,"@")) == NULL) {
    if((doubledot = strstr(link+8,":")) == NULL) {
00570       hostname = strdup(link+8);
00571       if(is_valid_hostname(hostname)) {
00572         free(hostname);
00573         return -1;
00574       }
00575       free(hostname);
00576     }
00577     else {
00578       hostname = strndup(link+8,link+8-doubledot);
00579       if(is_valid_hostname(hostname)) {
00580         free(hostname);
00581         return -1;
00582       }
00583       free(hostname);
00584 
00585       for(ptr=doubledot+1;*ptr;ptr++) {
00586         if(!isdigit(*ptr)) {
00587           /* we do not allow anything else after the slash */
00588           if(*ptr == '/' && *(ptr+1)) return -1;
00589         }
00590       }
00591     }
00592   }
00593   else {
00594     if((doubledot = strstr(anchor+1,":")) == NULL) {
00595       if(is_valid_hostname(anchor+1)) return -1;
00596     }
00597     else {
00598       hostname = strndup(anchor+1,anchor+1-doubledot);
00599       if(is_valid_hostname(hostname)) {
00600         free(hostname);
00601         return -1;
00602       }
00603       free(hostname);
00604 
00605       for(ptr=doubledot+1;*ptr;ptr++) {
00606         if(!isdigit(*ptr)) {
00607           /* we do not allow anything else after the slash */
00608           if(*ptr == '/' && *(ptr+1)) return -1;
00609         }
00610       }
00611     }
00612   }
00613 
00614   /* ok then -- it seems as if hostname and port are all right, check username
00615    * and password, if exist
00616    */
00617   if(anchor) {
00618     for(ptr=link+8;*ptr;ptr++) {
00619       if(is_uchar_wo_escape(*ptr) == 0) {
00620         /* end of username */
00621         if(*ptr == ':' || *ptr == '@') break;
00622 
00623         /* escaped char */
00624         if(*ptr == '%') {
00625           if(isxdigit(*(ptr+1)) == 0 || isxdigit(*(ptr+2)) == 0) return -1;
00626         }
00627 
00628         /* there are some special characters allowed */
00629         switch(*ptr) {
00630           case ';':
00631           case '?':
00632           case '&':
00633           case '=':
00634             break;
00635           default:
00636             return -1;
00637         }
00638       }
00639 
00640     }
00641 
00642     /* password       = *[ uchar | ";" | "?" | "&" | "=" ] */
00643     if(*ptr == ':') {
00644       for(ptr++;*ptr;ptr++) {
00645         if(is_uchar_wo_escape(*ptr) == 0) {
00646           if(*ptr == '%') {
00647             if(isxdigit(*(ptr+1)) == 0 || isxdigit(*(ptr+2)) == 0) return -1;
00648           }
00649 
00650           switch(*ptr) {
00651             case ';':
00652             case '?':
00653             case '&':
00654             case '=':
00655               break;
00656             default:
00657               return -1;
00658           }
00659         }
00660       }
00661     }
00662 
00663     /* we have to be at the end of the string now */
00664     if(*ptr) return -1;
00665   }
00666 
00667 
00668   return 0;
00669 }
00670 /* }}} */
00671 
00672 /* {{{ is_valid_nntp_link */
00673 int is_valid_nntp_link(const u_char *link) {
00674   register const u_char *ptr;
00675   const u_char *end;
00676   u_char *hostname;
00677 
00678   if(cf_strncmp(link,"nntp://",7)) return -1;
00679   else ptr = link + 7;
00680 
00681   if((end = strstr(ptr,"/")) == NULL) return -1;
00682 
00683   hostname = strndup(ptr,end-ptr);
00684   if(is_valid_hostname(hostname)) {
00685     free(hostname);
00686     return -1;
00687   }
00688   free(hostname);
00689 
00690   ptr = end + 1;
00691   if(!isalpha(*ptr)) return -1;
00692 
00693   for(*ptr;ptr++) {
00694     if(*ptr == '/') break;
00695 
00696     if(!isalnum(*ptr)) {
00697       case '-':
00698       case '.':
00699       case '+':
00700       case '_':
00701         break;
00702       default:
00703         return -1;
00704     }
00705   }
00706 
00707   if(*ptr == '/') {
00708     for(;*ptr;ptr++) {
00709       if(!isdigit(*ptr)) return -1;
00710     }
00711   }
00712 
00713   return 0;
00714 }
00715 /* }}} */
00716 
00717 /* {{{ is_valid_news_link */
00718 int is_valid_news_link(const u_char *link) {
00719   const u_char *ptr;
00720   const u_char *anchor,*doublepoint;
00721   u_char *hostname;
00722 
00723   if(cf_strncmp(link,"news:",5)) return -1;
00724   ptr = link + 5;
00725 
00726   /* grouppart is "*" or a group or an article */
00727   if(*ptr == '*' && *(ptr+1)) return -1;
00728 
00729   /* article or group? article must containe a host... */
00730   if((anchor = strstr(ptr,"@")) == NULL) {
    if(!is_uchar_wo_escape(*ptr)) {
      if(*ptr == '%') {
        if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
        ptr += 2;
        continue;
      }

      switch(*ptr) {
        case ';':
        case '/':
        case '?':
        case ':':
        case '&':
        case '=':
          break;
        default:
          return -1;
      }
    }

    for(++ptr;*ptr;ptr++) {
      if(!is_uchar_wo_escape(*ptr)) {
        if(*ptr == '@') break;

        if(*ptr == '%') {
          if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
          ptr += 2;
          continue;
        }

        switch(*ptr) {
          case ';':
          case '/':
          case '?':
          case ':':
          case '&':
          case '=':
            break;
          default:
            return -1;
        }
      }
    }

    if((doublepoint = strstr(anchor,":")) == NULL) {
00731       if(is_valid_hostname(anchor+1)) return -1;
00732     }
00733     else {
00734       hostname = strndup(anchor+1,doublepoint-anchor);
00735       if(is_valid_hostname(hostname)) {
00736         free(hostname);
00737         return -1;
00738       }
00739       free(hostname);
00740 
00741       for(ptr = doublepoint+1;*ptr;ptr++) {
00742         if(!isdigit(*ptr)) return -1;
00743       }
00744     }
00745   }
00746   else {
00747     if(!isalpha(*ptr)) return -1;
00748 
00749     for(;*ptr;ptr++) {
00750       if(!isalnum(*ptr)) {
00751         switch(*ptr) {
00752           case '-':
00753           case '.':
00754           case '+':
00755           case '_':
00756             break;
00757           default:
00758             return -1;
00759         }
00760       }
00761     }
00762   }
00763 
00764   return 0;
00765 }
00766 /* }}} */
00767 
00768 /* {{{ is_valid_gopher_link */
00769 int is_valid_gopher_link(const u_char *link) {
00770   register const u_char *ptr;
00771   const u_char *begin = NULL,*end = NULL;
00772   u_char *helper;
00773 
00774   /* first we check if the scheme is valid */
00775   if(cf_strncmp(link,"gopher://",9)) return -1;
00776   
00777   ptr = (u_char *)link + 9;
00778 
00779   begin = ptr;
00780 
00781   /* ok, it seems as if scheme is valid -- get hostname */
00782   for(;*ptr;ptr++) {
00783     if(*ptr == ':' || *ptr == '/') {
00784       end = ptr-1;
00785       break;
00786     }
00787   }
00788 
00789   /* URL consists only of a hostname if end is NULL */
00790   if(end == NULL) {
00791     end = ptr-1;
00792   }
00793 
00794   helper = strndup(begin,end-begin+1);
00795   if(is_valid_hostname(helper) == -1) {
00796     /* ups, no valid hostname -- die */
00797     free(helper);
00798     return -1;
00799   }
00800 
00801   free(helper);
00802 
00803   /* hostname is valid; follows a port? */
00804   if(*ptr == ':') {
00805     /* port has to be digit+ */
00806     for(begin=++ptr;*ptr;ptr++) {
00807       /* port is valid (at least one digit followed by a slash) */
00808       if(*ptr == '/' && ptr > begin) break;
00809 
00810       /* hu? port must be digits... bad boy! */
00811       if(!isdigit(*ptr)) return -1;
00812     }
00813   }
00814   
00815   if(!*ptr) return 0;
00816   if(*ptr++ != '/') {
00817     return -1;
00818   }
00819   
00820   for(begin=ptr;*ptr;ptr++) {
00821     /* escape sequenz */
00822     if(*ptr == '%') {
00823       // follows search?
00824       if(*(ptr+1) == '0' && *(ptr+2) == '9') {
00825         ptr += 3;
00826         break;
00827       }
00828       if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00829       ptr += 2;
00830       continue;
00831     }
00832     
00833     if(!isuchar_wo_escape(*ptr)) {
00834       switch(*ptr) {
00835         case ';':
00836         case '/':
00837         case '?':
00838         case ':':
00839         case '@':
00840         case '&':
00841         case '=':
00842           continue;
00843         default:
00844           return -1;
00845       }
00846     }
00847   }
00848   
00849   // valid (search may be empty because it is defined as *[ ... ]!)
00850   if(!*ptr) return 0;
00851 
00852   /* follows a search path */
00853   for(;*ptr;ptr++) {
00854     /* escaped character */
00855     if(*ptr == '%') {
00856       // follows gopher+?
00857       if(*(ptr+1) == '0' && *(ptr+2) == '9') {
00858         ptr += 3;
00859         break;
00860       }
00861       if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00862       ptr += 2;
00863       continue;
00864     }
00865 
00866     /* we checked for escaped before */
00867     if(!isuchar_wo_escape(*ptr)) {
00868       switch(*ptr) {
00869         case ';':
00870         case ':':
00871         case '@':
00872         case '&':
00873         case '=':
00874           break;
00875         default:
00876           /* no anchor in strict mode */
00877           return -1;
00878       }
00879     }
00880   }
00881   
00882   // valid (gopher+ may be empty because it is defined as *xchar!)
00883   if(!*ptr) return 0;
00884 
00885   for(;*ptr;ptr++) {
00886     /* escaped character */
00887     if(*ptr == '%') {
00888       if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00889       ptr += 2;
00890       continue;
00891     }
00892     /* we checked for escaped before */
00893     if(!isuchar_wo_escape(*ptr)) {
00894       switch(*ptr) {
00895         case ';':
00896         case '/':
00897         case '?':
00898         case ':':
00899         case '@':
00900         case '&':
00901         case '=':
00902           break;
00903         default:
00904           /* no anchor in strict mode */
00905           return -1;
00906       }
00907     }
00908   }
00909 
00910   return 0;
00911 }
00912 /* }}} */
00913 
00914 /* {{{ is_valid_ftp_link */
00915 int is_valid_ftp_link(const u_char *link) {
00916   register const u_char *ptr;
00917   const u_char *at,*slash,*doublepoint,*start,*hostname;
00918   int dd = 0;
00919 
00920   if(cf_strncmp(link,"ftp://",6)) return -1;
00921 
00922   /*
00923    * it doesnt matter if we only have a username or a password
00924    * and a username. Both consist of *[uchar|";"|"?"|"&"|"="]
00925    */
00926   if((at = strstr(link+6,"@")) != NULL) {
    for(ptr=link+6;ptr<at;ptr++) {
      if(!is_uchar_wo_escape(*ptr)) {
        if(*ptr == '%') {
          if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
          ptr += 2;
          continue;
        }

        switch(*ptr) {
          case ';':
          case '?':
          case '&':
          case '=':
            break;
          case ':':
            if(dd == 0) {
              dd = 1;
              break;
            }
          default:
            return -1;
        }
      }
    }
  }

  start = at ? at + 1 : link + 6;
  
  slash       = strstr(start,"/");
00927   doublepoint = strstr(start,":");
00928 
00929   if(!slash) {
00930     if(!doublepoint) {
00931       if(!is_valid_hostname(start)) return -1;
00932     }
00933     else {
00934       hostname = strndup(start,doublepoint-start);
00935       if(!is_valid_hostname(hostname)) {
00936         free(hostname);
00937         return -1;
00938       }
00939       free(hostname);
00940 
00941       for(ptr=doublepoint+1;*ptr;ptr++) {
00942         if(!isdigit(*ptr)) return -1;
00943       }
00944     }
00945   }
00946   /* a slash exists */
00947   else {
00948     if(doublepoint) {
00949       hostname = strndup(start,doublepoint-start);
00950       if(!is_valid_hostname(hostname)) {
00951         free(hostname);
00952         return -1;
00953       }
00954       free(hostname);
00955 
00956       for(ptr=doublepoint+1;*ptr != '/';ptr++) {
00957         if(!isdigit(*ptr)) return -1;
00958       }
00959     }
00960     else {
00961       hostname = strndup(start,slash-start);
00962       if(!is_valid_hostname(hostname)) {
00963         free(hostname);
00964         return -1;
00965       }
00966       free(hostname);
00967     }
00968   }
00969 
00970   if(slash) {
00971     for(ptr=slash+1;*ptr;ptr++) {
00972       if(!is_uchar_wo_escape(*ptr)) {
00973         if(*ptr == '%') {
00974           if(!isxdigit(*(ptr+1)) || !isxdigit(*(ptr+2))) return -1;
00975           ptr += 2;
00976           continue;
00977         }
00978 
00979         if(*ptr == ';') {
00980           break;
00981         }
00982 
00983         switch(*ptr) {
00984           case '?':
00985           case ':':
00986           case '@':
00987           case '&':
00988           case '=':
00989             break;
00990           default:
00991             return -1;
00992         }
00993       }
00994     }
00995   }
00996 
00997   if(*ptr == ';') {
00998     switch(*(ptr+1)) {
00999       case 'A':
01000       case 'I':
01001       case 'D':
01002       case 'a':
01003       case 'i':
01004       case 'd':
01005         break;
01006       default;
01007         return -1;
01008     }
01009 
01010     if(*(ptr+2)) return -1;
01011   }
01012 
01013   return 0;
01014 }
01015 /* }}} */
01016 
01017 /* {{{ is_valid_link */
01022 int is_valid_link(const u_char *link) {
01023   u_char *ptr = strstr(link,"://");
01024   u_char scheme[20];
01025   int i;
01026 
01027   /*
01028    * no scheme found, but mailto-links are mailto:<address> and news-links are
01029    * news:<address>, not mailto://<address> and not news://<address> -- so we
01030    * have to do an extra check
01031    */
01032   if(ptr == NULL) {
01033     if(cf_strncmp(link,"mailto:",7) == 0) {
01034       strcpy(scheme,"mailto");
01035     }
01036     else if(cf_strncmp(link,"news:",5) == 0) {
01037       strcpy(scheme,"news");
01038     }
01039     else return -1;
01040   }
01041   else {
01042     if(ptr-link >= 20) return -1; /* hu, seems not to be a valid scheme */
01043 
01044     strncpy(scheme,link,ptr-link);
01045     scheme[ptr-link] = '\0';
01046   }
01047 
01048   for(i=0;scheme_list[i].validator;i++) {
01049     if(cf_strcmp(scheme_list[i].scheme,scheme) == 0) {
01050       return scheme_list[i].validator(link);
01051     }
01052   }
01053 
01054   return -1;
01055 }
01056 /* }}} */
01057 
01058 /* eof */
01059 

Generated on Sun Apr 25 16:37:39 2004 for Classic Forum by doxygen 1.3.5