00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "config.h"
00027
00028
00029
00030
00031
00032 #include <pthread.h>
00033 #include <limits.h>
00034 #include <unistd.h>
00035 #include <sys/types.h>
00036 #include <sys/stat.h>
00037
00038 #include <cstring>
00039 #include <iostream>
00040 #include <sstream>
00041 #include <algorithm>
00042 #include <iterator>
00043 #include <set>
00044
00045 #include "Error.h"
00046 #include "InternalErr.h"
00047 #include "ResponseTooBigErr.h"
00048 #ifndef WIN32
00049 #include "SignalHandler.h"
00050 #endif
00051 #include "HTTPCacheInterruptHandler.h"
00052 #include "HTTPCacheTable.h"
00053
00054 #include "util_mit.h"
00055 #include "debug.h"
00056
00057 #ifdef WIN32
00058 #include <direct.h>
00059 #include <time.h>
00060 #include <fcntl.h>
00061 #define MKDIR(a,b) _mkdir((a))
00062 #define REMOVE(a) remove((a))
00063 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
00064 #define DIR_SEPARATOR_CHAR '\\'
00065 #define DIR_SEPARATOR_STR "\\"
00066 #else
00067 #define MKDIR(a,b) mkdir((a), (b))
00068 #define REMOVE(a) remove((a))
00069 #define MKSTEMP(a) mkstemp((a))
00070 #define DIR_SEPARATOR_CHAR '/'
00071 #define DIR_SEPARATOR_STR "/"
00072 #endif
00073
00074 #define CACHE_META ".meta"
00075 #define CACHE_INDEX ".index"
00076 #define CACHE_EMPTY_ETAG "@cache@"
00077
00078 #define NO_LM_EXPIRATION 24*3600 // 24 hours
00079 #define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM
00080
00081 // If using LM to find the expiration then take 10% and no more than
00082 // MAX_LM_EXPIRATION.
00083 #ifndef LM_EXPIRATION
00084 #define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
00085 #endif
00086
00087 const int CACHE_TABLE_SIZE = 1499;
00088
00089 using namespace std;
00090
00091 namespace libdap {
00092
00096 int
00097 get_hash(const string &url)
00098 {
00099 int hash = 0;
00100
00101 for (const char *ptr = url.c_str(); *ptr; ptr++)
00102 hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);
00103
00104 return hash;
00105 }
00106
00107 HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size) :
00108 d_cache_root(cache_root),
00109 d_block_size(block_size),
00110 d_current_size(0),
00111 d_new_entries(0)
00112 {
00113 d_cache_index = cache_root + CACHE_INDEX;
00114
00115 d_cache_table = new CacheEntries*[CACHE_TABLE_SIZE];
00116
00117 // Initialize the cache table.
00118 for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
00119 d_cache_table[i] = 0;
00120
00121 cache_index_read();
00122 }
00123
00127 static inline void
00128 delete_cache_entry(HTTPCacheTable::CacheEntry *e)
00129 {
00130 DBG2(cerr << "Deleting CacheEntry: " << e << endl);
00131 delete e;
00132 }
00133
00134 HTTPCacheTable::~HTTPCacheTable() {
00135 for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
00136 HTTPCacheTable::CacheEntries *cp = get_cache_table()[i];
00137 if (cp) {
00138
00139 for_each(cp->begin(), cp->end(), delete_cache_entry);
00140
00141
00142 delete get_cache_table()[i];
00143 get_cache_table()[i] = 0;
00144 }
00145 }
00146
00147 delete[] d_cache_table;
00148 }
00149
00157 class DeleteExpired : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00158 time_t d_time;
00159 HTTPCacheTable &d_table;
00160
00161 public:
00162 DeleteExpired(HTTPCacheTable &table, time_t t) :
00163 d_time(t), d_table(table) {
00164 if (!t)
00165 d_time = time(0);
00166 }
00167
00168 void operator()(HTTPCacheTable::CacheEntry *&e) {
00169 if (e && !e->readers && (e->freshness_lifetime
00170 < (e->corrected_initial_age + (d_time - e->response_time)))) {
00171 DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
00172 d_table.remove_cache_entry(e);
00173 delete e; e = 0;
00174 }
00175 }
00176 };
00177
00178
00179 void HTTPCacheTable::delete_expired_entries(time_t time) {
00180
00181 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00182 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00183 if (slot) {
00184 for_each(slot->begin(), slot->end(), DeleteExpired(*this, time));
00185 slot->erase(remove(slot->begin(), slot->end(),
00186 static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
00187 }
00188 }
00189 }
00190
00197 class DeleteByHits : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00198 HTTPCacheTable &d_table;
00199 int d_hits;
00200
00201 public:
00202 DeleteByHits(HTTPCacheTable &table, int hits) :
00203 d_table(table), d_hits(hits) {
00204 }
00205
00206 void operator()(HTTPCacheTable::CacheEntry *&e) {
00207 if (e && !e->readers && e->hits <= d_hits) {
00208 DBG(cerr << "Deleting cache entry: " << e->url << endl);
00209 d_table.remove_cache_entry(e);
00210 delete e; e = 0;
00211 }
00212 }
00213 };
00214
00215 void
00216 HTTPCacheTable::delete_by_hits(int hits) {
00217 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00218 if (get_cache_table()[cnt]) {
00219 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00220 for_each(slot->begin(), slot->end(), DeleteByHits(*this, hits));
00221 slot->erase(remove(slot->begin(), slot->end(),
00222 static_cast<HTTPCacheTable::CacheEntry*>(0)),
00223 slot->end());
00224
00225 }
00226 }
00227 }
00228
00233 class DeleteBySize : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00234 HTTPCacheTable &d_table;
00235 unsigned int d_size;
00236
00237 public:
00238 DeleteBySize(HTTPCacheTable &table, unsigned int size) :
00239 d_table(table), d_size(size) {
00240 }
00241
00242 void operator()(HTTPCacheTable::CacheEntry *&e) {
00243 if (e && !e->readers && e->size > d_size) {
00244 DBG(cerr << "Deleting cache entry: " << e->url << endl);
00245 d_table.remove_cache_entry(e);
00246 delete e; e = 0;
00247 }
00248 }
00249 };
00250
00251 void HTTPCacheTable::delete_by_size(unsigned int size) {
00252 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00253 if (get_cache_table()[cnt]) {
00254 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00255 for_each(slot->begin(), slot->end(), DeleteBySize(*this, size));
00256 slot->erase(remove(slot->begin(), slot->end(),
00257 static_cast<HTTPCacheTable::CacheEntry*>(0)),
00258 slot->end());
00259
00260 }
00261 }
00262 }
00263
00270
00277 bool
00278 HTTPCacheTable::cache_index_delete()
00279 {
00280 d_new_entries = 0;
00281
00282 return (REMOVE(d_cache_index.c_str()) == 0);
00283 }
00284
00293 bool
00294 HTTPCacheTable::cache_index_read()
00295 {
00296 FILE *fp = fopen(d_cache_index.c_str(), "r");
00297
00298
00299 if (!fp) {
00300 return false;
00301 }
00302
00303 char line[1024];
00304 while (!feof(fp) && fgets(line, 1024, fp)) {
00305 add_entry_to_cache_table(cache_index_parse_line(line));
00306 DBG2(cerr << line << endl);
00307 }
00308
00309 int res = fclose(fp) ;
00310 if (res) {
00311 DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
00312 }
00313
00314 d_new_entries = 0;
00315
00316 return true;
00317 }
00318
00326 HTTPCacheTable::CacheEntry *
00327 HTTPCacheTable::cache_index_parse_line(const char *line)
00328 {
00329
00330 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry;
00331 istringstream iss(line);
00332 iss >> entry->url;
00333 iss >> entry->cachename;
00334
00335 iss >> entry->etag;
00336 if (entry->etag == CACHE_EMPTY_ETAG)
00337 entry->etag = "";
00338
00339 iss >> entry->lm;
00340 iss >> entry->expires;
00341 iss >> entry->size;
00342 iss >> entry->range;
00343
00344 iss >> entry->hash;
00345 iss >> entry->hits;
00346 iss >> entry->freshness_lifetime;
00347 iss >> entry->response_time;
00348 iss >> entry->corrected_initial_age;
00349
00350 iss >> entry->must_revalidate;
00351
00352 return entry;
00353 }
00354
00357 class WriteOneCacheEntry :
00358 public unary_function<HTTPCacheTable::CacheEntry *, void>
00359 {
00360
00361 FILE *d_fp;
00362
00363 public:
00364 WriteOneCacheEntry(FILE *fp) : d_fp(fp)
00365 {}
00366
00367 void operator()(HTTPCacheTable::CacheEntry *e)
00368 {
00369 if (e && fprintf(d_fp,
00370 "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
00371 e->url.c_str(),
00372 e->cachename.c_str(),
00373 e->etag == "" ? CACHE_EMPTY_ETAG : e->etag.c_str(),
00374 (long)(e->lm),
00375 (long)(e->expires),
00376 e->size,
00377 e->range ? '1' : '0',
00378 e->hash,
00379 e->hits,
00380 (long)(e->freshness_lifetime),
00381 (long)(e->response_time),
00382 (long)(e->corrected_initial_age),
00383 e->must_revalidate ? '1' : '0') < 0)
00384 throw Error("Cache Index. Error writing cache index\n");
00385 }
00386 };
00387
00397 void
00398 HTTPCacheTable::cache_index_write()
00399 {
00400 DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
00401
00402
00403 FILE * fp = NULL;
00404 if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
00405 throw Error(string("Cache Index. Can't open `") + d_cache_index
00406 + string("' for writing"));
00407 }
00408
00409
00410
00411
00412 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00413 HTTPCacheTable::CacheEntries *cp = get_cache_table()[cnt];
00414 if (cp)
00415 for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
00416 }
00417
00418
00419 int res = fclose(fp);
00420 if (res) {
00421 DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
00422 << (void *)fp << endl);
00423 }
00424
00425 d_new_entries = 0;
00426 }
00427
00429
00442 string
00443 HTTPCacheTable::create_hash_directory(int hash)
00444 {
00445 struct stat stat_info;
00446 ostringstream path;
00447
00448 path << d_cache_root << hash;
00449 string p = path.str();
00450
00451 if (stat(p.c_str(), &stat_info) == -1) {
00452 DBG2(cerr << "Cache....... Create dir " << p << endl);
00453 if (MKDIR(p.c_str(), 0777) < 0) {
00454 DBG2(cerr << "Cache....... Can't create..." << endl);
00455 throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
00456 }
00457 }
00458 else {
00459 DBG2(cerr << "Cache....... Directory " << p << " already exists"
00460 << endl);
00461 }
00462
00463 return p;
00464 }
00465
00480 void
00481 HTTPCacheTable::create_location(HTTPCacheTable::CacheEntry *entry)
00482 {
00483 string hash_dir = create_hash_directory(entry->hash);
00484 #ifdef WIN32
00485 hash_dir += "\\dodsXXXXXX";
00486 #else
00487 hash_dir += "/dodsXXXXXX";
00488 #endif
00489
00490
00491 char *templat = new char[hash_dir.size() + 1];
00492 strcpy(templat, hash_dir.c_str());
00493
00494
00495
00496
00497
00498 int fd = MKSTEMP(templat);
00499 if (fd < 0) {
00500 delete[] templat; templat = 0;
00501 close(fd);
00502 throw Error("The HTTP Cache could not create a file to hold the response; it will not be cached.");
00503 }
00504
00505 entry->cachename = templat;
00506 delete[] templat; templat = 0;
00507 close(fd);
00508 }
00509
00510
00512 static inline int
00513 entry_disk_space(int size, unsigned int block_size)
00514 {
00515 unsigned int num_of_blocks = (size + block_size) / block_size;
00516
00517 DBG(cerr << "size: " << size << ", block_size: " << block_size
00518 << ", num_of_blocks: " << num_of_blocks << endl);
00519
00520 return num_of_blocks * block_size;
00521 }
00522
00526
00532 void
00533 HTTPCacheTable::add_entry_to_cache_table(CacheEntry *entry)
00534 {
00535 int hash = entry->hash;
00536
00537 if (!d_cache_table[hash])
00538 d_cache_table[hash] = new CacheEntries;
00539
00540 d_cache_table[hash]->push_back(entry);
00541
00542 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
00543 << ", entry->size: " << entry->size << ", block size: " << d_block_size
00544 << endl);
00545
00546 d_current_size += entry_disk_space(entry->size, d_block_size);
00547
00548 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
00549
00550 increment_new_entries();
00551 }
00552
00556 HTTPCacheTable::CacheEntry *
00557 HTTPCacheTable::get_locked_entry_from_cache_table(const string &url)
00558 {
00559 return get_locked_entry_from_cache_table(get_hash(url), url);
00560 }
00561
00569 HTTPCacheTable::CacheEntry *
00570 HTTPCacheTable::get_locked_entry_from_cache_table(int hash, const string &url)
00571 {
00572 DBG(cerr << "url: " << url << "; hash: " << hash << endl);
00573 DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
00574 if (d_cache_table[hash]) {
00575 CacheEntries *cp = d_cache_table[hash];
00576 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
00577
00578
00579 if ((*i) && (*i)->url == url) {
00580 (*i)->lock_read_response();
00581 return *i;
00582 }
00583 }
00584 }
00585
00586 return 0;
00587 }
00588
00595 HTTPCacheTable::CacheEntry *
00596 HTTPCacheTable::get_write_locked_entry_from_cache_table(const string &url)
00597 {
00598 int hash = get_hash(url);
00599 if (d_cache_table[hash]) {
00600 CacheEntries *cp = d_cache_table[hash];
00601 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
00602
00603
00604 if ((*i) && (*i)->url == url) {
00605 (*i)->lock_write_response();
00606 return *i;
00607 }
00608 }
00609 }
00610
00611 return 0;
00612 }
00613
00621 void
00622 HTTPCacheTable::remove_cache_entry(HTTPCacheTable::CacheEntry *entry)
00623 {
00624
00625
00626 if (entry->readers)
00627 throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
00628
00629 REMOVE(entry->cachename.c_str());
00630 REMOVE(string(entry->cachename + CACHE_META).c_str());
00631
00632 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
00633
00634 unsigned int eds = entry_disk_space(entry->size, get_block_size());
00635 set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
00636
00637 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
00638 }
00639
00642 class DeleteCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void>
00643 {
00644 string d_url;
00645 HTTPCacheTable *d_cache_table;
00646
00647 public:
00648 DeleteCacheEntry(HTTPCacheTable *c, const string &url)
00649 : d_url(url), d_cache_table(c)
00650 {}
00651
00652 void operator()(HTTPCacheTable::CacheEntry *&e)
00653 {
00654 if (e && e->url == d_url) {
00655 e->lock_write_response();
00656 d_cache_table->remove_cache_entry(e);
00657 e->unlock_write_response();
00658 delete e; e = 0;
00659 }
00660 }
00661 };
00662
00669 void
00670 HTTPCacheTable::remove_entry_from_cache_table(const string &url)
00671 {
00672 int hash = get_hash(url);
00673 if (d_cache_table[hash]) {
00674 CacheEntries *cp = d_cache_table[hash];
00675 for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
00676 cp->erase(remove(cp->begin(), cp->end(), static_cast<HTTPCacheTable::CacheEntry*>(0)),
00677 cp->end());
00678 }
00679 }
00680
00683 class DeleteUnlockedCacheEntry :
00684 public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00685 HTTPCacheTable &d_table;
00686
00687 public:
00688 DeleteUnlockedCacheEntry(HTTPCacheTable &t) :
00689 d_table(t) {
00690 }
00691 void operator()(HTTPCacheTable::CacheEntry *&e) {
00692 if (e) {
00693 d_table.remove_cache_entry(e);
00694 delete e; e = 0;
00695 }
00696 }
00697 };
00698
00699 void HTTPCacheTable::delete_all_entries() {
00700
00701
00702 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00703 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00704 if (slot) {
00705 for_each(slot->begin(), slot->end(), DeleteUnlockedCacheEntry(*this));
00706 slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *>(0)),
00707 slot->end());
00708 }
00709 }
00710
00711 cache_index_delete();
00712 }
00713
00727 void
00728 HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
00729 {
00730 entry->response_time = time(NULL);
00731 time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
00732 time_t corrected_received_age = max(apparent_age, entry->age);
00733 time_t response_delay = entry->response_time - request_time;
00734 entry->corrected_initial_age = corrected_received_age + response_delay;
00735
00736
00737
00738
00739 time_t freshness_lifetime = entry->max_age;
00740 if (freshness_lifetime < 0) {
00741 if (entry->expires < 0) {
00742 if (entry->lm < 0) {
00743 freshness_lifetime = default_expiration;
00744 }
00745 else {
00746 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
00747 }
00748 }
00749 else
00750 freshness_lifetime = entry->expires - entry->date;
00751 }
00752
00753 entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
00754
00755 DBG2(cerr << "Cache....... Received Age " << entry->age
00756 << ", corrected " << entry->corrected_initial_age
00757 << ", freshness lifetime " << entry->freshness_lifetime << endl);
00758 }
00759
00771 void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry,
00772 unsigned long max_entry_size, const vector<string> &headers) {
00773 vector<string>::const_iterator i;
00774 for (i = headers.begin(); i != headers.end(); ++i) {
00775
00776 if ((*i).empty())
00777 continue;
00778
00779 string::size_type colon = (*i).find(':');
00780
00781
00782 if (colon == string::npos)
00783 continue;
00784
00785 string header = (*i).substr(0, (*i).find(':'));
00786 string value = (*i).substr((*i).find(": ") + 2);
00787 DBG2(cerr << "Header: " << header << endl);DBG2(cerr << "Value: " << value << endl);
00788
00789 if (header == "ETag") {
00790 entry->etag = value;
00791 } else if (header == "Last-Modified") {
00792 entry->lm = parse_time(value.c_str());
00793 } else if (header == "Expires") {
00794 entry->expires = parse_time(value.c_str());
00795 } else if (header == "Date") {
00796 entry->date = parse_time(value.c_str());
00797 } else if (header == "Age") {
00798 entry->age = parse_time(value.c_str());
00799 } else if (header == "Content-Length") {
00800 unsigned long clength = strtoul(value.c_str(), 0, 0);
00801 if (clength > max_entry_size)
00802 entry->set_no_cache(true);
00803 } else if (header == "Cache-Control") {
00804
00805
00806
00807 if (value == "no-cache" || value == "no-store")
00808
00809
00810
00811 entry->set_no_cache(true);
00812 else if (value == "must-revalidate")
00813 entry->must_revalidate = true;
00814 else if (value.find("max-age") != string::npos) {
00815 string max_age = value.substr(value.find("=" + 1));
00816 entry->max_age = parse_time(max_age.c_str());
00817 }
00818 }
00819 }
00820 }
00821
00823
00824
00825 void HTTPCacheTable::bind_entry_to_data(HTTPCacheTable::CacheEntry *entry, FILE *body) {
00826 entry->hits++;
00827 d_locked_entries[body] = entry;
00828 }
00829
00830 void HTTPCacheTable::uncouple_entry_from_data(FILE *body) {
00831 HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
00832 if (!entry)
00833 throw InternalErr("There is no cache entry for the response given.");
00834
00835 d_locked_entries.erase(body);
00836 entry->unlock_read_response();
00837
00838 if (entry->readers < 0)
00839 throw InternalErr("An unlocked entry was released");
00840 }
00841
00842 bool HTTPCacheTable::is_locked_read_responses() {
00843 return !d_locked_entries.empty();
00844 }
00845
00846 }