summaryrefslogtreecommitdiffstats
path: root/src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch
diff options
context:
space:
mode:
Diffstat (limited to 'src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch')
-rw-r--r--src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch512
1 files changed, 512 insertions, 0 deletions
diff --git a/src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch b/src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch
new file mode 100644
index 0000000..010407b
--- /dev/null
+++ b/src/ceph/0007-librbd-cleanup-policy-based-promotion-eviction.patch
@@ -0,0 +1,512 @@
+From dd4804fb05ad8aca51516b0112975cc91ef85a6b Mon Sep 17 00:00:00 2001
+From: Yuan Zhou <yuan.zhou@intel.com>
+Date: Wed, 8 Aug 2018 15:31:47 +0800
+Subject: [PATCH 07/10] librbd: cleanup policy based promotion/eviction
+
+Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
+---
+ src/common/options.cc | 4 +
+ .../rbd_cache/CacheControllerSocketClient.hpp | 3 +-
+ src/tools/rbd_cache/ObjectCacheStore.cc | 63 +++----
+ src/tools/rbd_cache/ObjectCacheStore.h | 10 +-
+ src/tools/rbd_cache/Policy.hpp | 18 +-
+ src/tools/rbd_cache/SimplePolicy.hpp | 188 +++++++++------------
+ 6 files changed, 141 insertions(+), 145 deletions(-)
+
+diff --git a/src/common/options.cc b/src/common/options.cc
+index 7839a31..b334c1e 100644
+--- a/src/common/options.cc
++++ b/src/common/options.cc
+@@ -6365,6 +6365,10 @@ static std::vector<Option> get_rbd_options() {
+ .set_default("/tmp")
+ .set_description("shared ssd caching data dir"),
+
++ Option("rbd_shared_cache_entries", Option::TYPE_INT, Option::LEVEL_ADVANCED)
++ .set_default(4096)
++ .set_description("shared ssd caching data entries"),
++
+ Option("rbd_non_blocking_aio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+ .set_default(true)
+ .set_description("process AIO ops from a dispatch thread to prevent blocking"),
+diff --git a/src/tools/rbd_cache/CacheControllerSocketClient.hpp b/src/tools/rbd_cache/CacheControllerSocketClient.hpp
+index 4e1f36c..56b79ce 100644
+--- a/src/tools/rbd_cache/CacheControllerSocketClient.hpp
++++ b/src/tools/rbd_cache/CacheControllerSocketClient.hpp
+@@ -90,7 +90,8 @@ public:
+ }
+ });
+ std::unique_lock<std::mutex> lk(m);
+- cv.wait(lk);
++ //cv.wait(lk);
++ cv.wait_for(lk, std::chrono::milliseconds(100));
+ return 0;
+ }
+
+diff --git a/src/tools/rbd_cache/ObjectCacheStore.cc b/src/tools/rbd_cache/ObjectCacheStore.cc
+index b39fe66..99f90d6 100644
+--- a/src/tools/rbd_cache/ObjectCacheStore.cc
++++ b/src/tools/rbd_cache/ObjectCacheStore.cc
+@@ -15,7 +15,12 @@ namespace cache {
+ ObjectCacheStore::ObjectCacheStore(CephContext *cct, ContextWQ* work_queue)
+ : m_cct(cct), m_work_queue(work_queue),
+ m_rados(new librados::Rados()) {
+- m_policy = new SimplePolicy(4096, 0.9); // TODO
++
++ uint64_t object_cache_entries =
++ cct->_conf.get_val<int64_t>("rbd_shared_cache_entries");
++
++ //TODO(): allow to set level
++ m_policy = new SimplePolicy(object_cache_entries, 0.5);
+ }
+
+ ObjectCacheStore::~ObjectCacheStore() {
+@@ -35,7 +40,16 @@ int ObjectCacheStore::init(bool reset) {
+ lderr(m_cct) << "fail to conect to cluster" << dendl;
+ return ret;
+ }
+- //TODO(): check existing cache objects
++
++ std::string cache_path = m_cct->_conf.get_val<std::string>("rbd_shared_cache_path");
++ //TODO(): check and reuse existing cache objects
++ if(reset) {
++ std::string cmd = "exec rm -rf " + cache_path + "/rbd_cache*; exec mkdir -p " + cache_path;
++ //TODO(): to use std::filesystem
++ int r = system(cmd.c_str());
++ }
++
++ evict_thd = new std::thread([this]{this->evict_thread_body();});
+ return ret;
+ }
+
+@@ -58,10 +72,6 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name)
+
+ librados::IoCtx* ioctx = m_ioctxs[pool_name];
+
+- //promoting: update metadata
+- m_policy->update_status(cache_file_name, PROMOTING);
+- assert(PROMOTING == m_policy->get_status(cache_file_name));
+-
+ librados::bufferlist* read_buf = new librados::bufferlist();
+ int object_size = 4096*1024; //TODO(): read config from image metadata
+
+@@ -83,9 +93,9 @@ int ObjectCacheStore::do_promote(std::string pool_name, std::string object_name)
+ ret = cache_file.write_object_to_file(*read_buf, object_size);
+
+ // update metadata
+- assert(PROMOTING == m_policy->get_status(cache_file_name));
+- m_policy->update_status(cache_file_name, PROMOTED);
+- assert(PROMOTED == m_policy->get_status(cache_file_name));
++ assert(OBJ_CACHE_PROMOTING == m_policy->get_status(cache_file_name));
++ m_policy->update_status(cache_file_name, OBJ_CACHE_PROMOTED);
++ assert(OBJ_CACHE_PROMOTED == m_policy->get_status(cache_file_name));
+
+ return ret;
+
+@@ -97,18 +107,15 @@ int ObjectCacheStore::lookup_object(std::string pool_name, std::string object_na
+
+ std::string cache_file_name = pool_name + object_name;
+
+- // TODO lookup and return status;
+-
+ CACHESTATUS ret;
+ ret = m_policy->lookup_object(cache_file_name);
+
+ switch(ret) {
+- case NONE:
++ case OBJ_CACHE_NONE:
+ return do_promote(pool_name, object_name);
+- case PROMOTING:
+- return -1;
+- case PROMOTED:
++ case OBJ_CACHE_PROMOTED:
+ return 0;
++ case OBJ_CACHE_PROMOTING:
+ default:
+ return -1;
+ }
+@@ -117,26 +124,14 @@ int ObjectCacheStore::lookup_object(std::string pool_name, std::string object_na
+ void ObjectCacheStore::evict_thread_body() {
+ int ret;
+ while(m_evict_go) {
+- std::string temp_cache_file;
+-
+- ret = m_policy->evict_object(temp_cache_file);
+- if(ret == 0) {
+- continue;
+- }
+-
+- // TODO
+- // delete temp_cache_file file.
+-
+- assert(EVICTING == m_policy->get_status(temp_cache_file));
+-
+- m_policy->update_status(temp_cache_file, EVICTED);
+-
+- assert(NONE == m_policy->get_status(temp_cache_file));
++ ret = evict_objects();
+ }
+ }
+
+
+ int ObjectCacheStore::shutdown() {
++ m_evict_go = false;
++ evict_thd->join();
+ m_rados->shutdown();
+ return 0;
+ }
+@@ -165,5 +160,13 @@ int ObjectCacheStore::promote_object(librados::IoCtx* ioctx, std::string object_
+
+ }
+
++int ObjectCacheStore::evict_objects() {
++ std::list<std::string> obj_list;
++ m_policy->get_evict_list(&obj_list);
++ for (auto& obj: obj_list) {
++ //do_evict(obj);
++ }
++}
++
+ } // namespace cache
+ } // namespace rbd
+diff --git a/src/tools/rbd_cache/ObjectCacheStore.h b/src/tools/rbd_cache/ObjectCacheStore.h
+index 5118a73..ba0e1f1 100644
+--- a/src/tools/rbd_cache/ObjectCacheStore.h
++++ b/src/tools/rbd_cache/ObjectCacheStore.h
+@@ -15,6 +15,7 @@
+ #include "librbd/cache/SharedPersistentObjectCacherFile.h"
+ #include "SimplePolicy.hpp"
+
++
+ using librados::Rados;
+ using librados::IoCtx;
+
+@@ -40,10 +41,9 @@ class ObjectCacheStore
+
+ int lock_cache(std::string vol_name);
+
+- void evict_thread_body();
+-
+ private:
+- int _evict_object();
++ void evict_thread_body();
++ int evict_objects();
+
+ int do_promote(std::string pool_name, std::string object_name);
+
+@@ -61,8 +61,8 @@ class ObjectCacheStore
+ librbd::cache::SyncFile *m_cache_file;
+
+ Policy* m_policy;
+-
+- bool m_evict_go;
++ std::thread* evict_thd;
++ bool m_evict_go = false;
+ };
+
+ } // namespace rbd
+diff --git a/src/tools/rbd_cache/Policy.hpp b/src/tools/rbd_cache/Policy.hpp
+index 575c294..711e3bd 100644
+--- a/src/tools/rbd_cache/Policy.hpp
++++ b/src/tools/rbd_cache/Policy.hpp
+@@ -1,12 +1,16 @@
+ #ifndef RBD_CACHE_POLICY_HPP
+ #define RBD_CACHE_POLICY_HPP
+
++#include <list>
++#include <string>
++
++namespace rbd {
++namespace cache {
++
+ enum CACHESTATUS {
+- NONE = 0,
+- PROMOTING,
+- PROMOTED,
+- EVICTING,
+- EVICTED,
++ OBJ_CACHE_NONE = 0,
++ OBJ_CACHE_PROMOTING,
++ OBJ_CACHE_PROMOTED,
+ };
+
+
+@@ -18,5 +22,9 @@ public:
+ virtual int evict_object(std::string&) = 0;
+ virtual void update_status(std::string, CACHESTATUS) = 0;
+ virtual CACHESTATUS get_status(std::string) = 0;
++ virtual void get_evict_list(std::list<std::string>* obj_list) = 0;
+ };
++
++} // namespace cache
++} // namespace rbd
+ #endif
+diff --git a/src/tools/rbd_cache/SimplePolicy.hpp b/src/tools/rbd_cache/SimplePolicy.hpp
+index a0d8de7..e785de1 100644
+--- a/src/tools/rbd_cache/SimplePolicy.hpp
++++ b/src/tools/rbd_cache/SimplePolicy.hpp
+@@ -3,138 +3,93 @@
+
+ #include "Policy.hpp"
+ #include "include/lru.h"
++#include "common/RWLock.h"
+ #include "common/Mutex.h"
+
+ #include <vector>
+ #include <unordered_map>
+ #include <string>
+
++namespace rbd {
++namespace cache {
++
++
+ class SimplePolicy : public Policy {
+ public:
+- SimplePolicy(uint64_t block_num, float level)
+- : m_level(level),
+- m_lock("SimplePolicy"),
+- m_entry_count(block_num)
++ SimplePolicy(uint64_t block_num, float watermark)
++ : m_watermark(watermark), m_entry_count(block_num),
++ m_cache_map_lock("rbd::cache::SimplePolicy::m_cache_map_lock"),
++ m_free_list_lock("rbd::cache::SimplePolicy::m_free_list_lock")
+ {
+
+- Entry m_entries[m_entry_count];
+-
+- for(auto &entry : m_entries) {
+- m_free_lru.lru_insert_bot(&entry);
++ for(uint64_t i = 0; i < m_entry_count; i++) {
++ m_free_list.push_back(new Entry());
+ }
++
+ }
+
+- ~SimplePolicy() {}
++ ~SimplePolicy() {
++ for(uint64_t i = 0; i < m_entry_count; i++) {
++ Entry* entry = reinterpret_cast<Entry*>(m_free_list.front());
++ delete entry;
++ m_free_list.pop_front();
++ }
++ }
+
+ CACHESTATUS lookup_object(std::string cache_file_name) {
+- Mutex::Locker locker(m_lock);
+
+- auto entry_it = m_oid_to_entry.find(cache_file_name);
+- if(entry_it == m_oid_to_entry.end()) {
+- return NONE;
++ //TODO(): check race condition
++ RWLock::WLocker wlocker(m_cache_map_lock);
++
++ auto entry_it = m_cache_map.find(cache_file_name);
++ if(entry_it == m_cache_map.end()) {
++ Mutex::Locker locker(m_free_list_lock);
++ Entry* entry = reinterpret_cast<Entry*>(m_free_list.front());
++ assert(entry != nullptr);
++ m_free_list.pop_front();
++ entry->status = OBJ_CACHE_PROMOTING;
++
++ m_cache_map[cache_file_name] = entry;
++
++ return OBJ_CACHE_NONE;
+ }
+
+ Entry* entry = entry_it->second;
+
+- LRU* lru;
+- if(entry->status == PROMOTED) {
+- lru = &m_promoted_lru;
+- } else {
+- lru = &m_handing_lru;
++ if(entry->status == OBJ_CACHE_PROMOTED) {
++ // touch it
++ m_promoted_lru.lru_touch(entry);
+ }
+
+- // touch it
+- lru->lru_remove(entry);
+- lru->lru_insert_top(entry);
+-
+ return entry->status;
+ }
+
+ int evict_object(std::string& out_cache_file_name) {
+- Mutex::Locker locker(m_lock);
+-
+- // still have enough free space, don't need to evict lru.
+- uint64_t temp_current_size = m_oid_to_entry.size();
+- float temp_current_evict_level = temp_current_size / m_entry_count;
+- if(temp_current_evict_level < m_level) {
+- return 0;
+- }
+-
+- // when all entries are USING, PROMOTING or EVICTING, just busy waiting.
+- if(m_promoted_lru.lru_get_size() == 0) {
+- return 0;
+- }
+-
+- assert(m_promoted_lru.lru_get_size() != 0);
+-
+- // evict one item from promoted lru
+- Entry *entry = reinterpret_cast<Entry*>(m_promoted_lru.lru_get_next_expire());
+- assert(entry != nullptr);
+-
+- assert(entry->status == PROMOTED);
+-
+- out_cache_file_name = entry->cache_file_name;
+- entry->status = EVICTING;
+-
+- m_promoted_lru.lru_remove(entry);
+- m_handing_lru.lru_insert_top(entry);
++ RWLock::WLocker locker(m_cache_map_lock);
+
+ return 1;
+ }
+
+ // TODO(): simplify the logic
+- void update_status(std::string _file_name, CACHESTATUS _status) {
+- Mutex::Locker locker(m_lock);
++ void update_status(std::string file_name, CACHESTATUS new_status) {
++ RWLock::WLocker locker(m_cache_map_lock);
+
+ Entry* entry;
+- auto entry_it = m_oid_to_entry.find(_file_name);
++ auto entry_it = m_cache_map.find(file_name);
+
+ // just check.
+- if(_status == PROMOTING) {
+- assert(m_oid_to_entry.find(_file_name) == m_oid_to_entry.end());
++ if(new_status == OBJ_CACHE_PROMOTING) {
++ assert(entry_it == m_cache_map.end());
+ }
+
+- // miss this object.
+- if(entry_it == m_oid_to_entry.end() && _status == PROMOTING) {
+- entry = reinterpret_cast<Entry*>(m_free_lru.lru_get_next_expire());
+- if(entry == nullptr) {
+- assert(0); // namely evict thread have some problems.
+- }
+-
+- entry->status = PROMOTING;
+-
+- m_oid_to_entry[_file_name] = entry;
+- m_free_lru.lru_remove(entry);
+- m_handing_lru.lru_insert_top(entry);
+-
+- return;
+- }
+-
+- assert(entry_it != m_oid_to_entry.end());
++ assert(entry_it != m_cache_map.end());
+
+ entry = entry_it->second;
+
+- // promoting action have been finished, so update it.
+- if(entry->status == PROMOTING && _status== PROMOTED) {
+- m_handing_lru.lru_remove(entry);
++ // promoting is done, so update it.
++ if(entry->status == OBJ_CACHE_PROMOTING && new_status== OBJ_CACHE_PROMOTED) {
+ m_promoted_lru.lru_insert_top(entry);
+- entry->status = PROMOTED;
+- return;
+- }
+-
+- // will delete this cache file
+- if(entry->status == PROMOTED && _status == EVICTING) {
+- m_promoted_lru.lru_remove(entry);
+- m_handing_lru.lru_insert_top(entry);
+- entry->status = EVICTING;
+- return;
+- }
+-
+-
+- if(_status == EVICTED) {
+- m_oid_to_entry.erase(entry_it);
+- m_handing_lru.lru_remove(entry);
+- m_free_lru.lru_insert_bot(entry);
++ entry->status = new_status;
+ return;
+ }
+
+@@ -142,39 +97,64 @@ public:
+ }
+
+ // get entry status
+- CACHESTATUS get_status(std::string _file_name) {
+- Mutex::Locker locker(m_lock);
+- auto entry_it = m_oid_to_entry.find(_file_name);
+- if(entry_it == m_oid_to_entry.end()) {
+- return NONE;
++ CACHESTATUS get_status(std::string file_name) {
++ RWLock::RLocker locker(m_cache_map_lock);
++ auto entry_it = m_cache_map.find(file_name);
++ if(entry_it == m_cache_map.end()) {
++ return OBJ_CACHE_NONE;
+ }
+
+ return entry_it->second->status;
+ }
+
++ void get_evict_list(std::list<std::string>* obj_list) {
++ RWLock::WLocker locker(m_cache_map_lock);
++ // check free ratio, pop entries from LRU
++ if (m_free_list.size() / m_entry_count < m_watermark) {
++ int evict_num = 10; //TODO(): make this configurable
++ for(int i = 0; i < evict_num; i++) {
++ Entry* entry = reinterpret_cast<Entry*>(m_promoted_lru.lru_expire());
++ if (entry == nullptr) {
++ continue;
++ }
++ std::string file_name = entry->cache_file_name;
++ obj_list->push_back(file_name);
++
++ auto entry_it = m_cache_map.find(file_name);
++ m_cache_map.erase(entry_it);
++
++ //mark this entry as free
++ entry->status = OBJ_CACHE_NONE;
++ Mutex::Locker locker(m_free_list_lock);
++ m_free_list.push_back(entry);
++ }
++ }
++ }
+
+ private:
+
+ class Entry : public LRUObject {
+ public:
+ CACHESTATUS status;
+- Entry() : status(NONE){}
++ Entry() : status(OBJ_CACHE_NONE){}
+ std::string cache_file_name;
+ void encode(bufferlist &bl){}
+ void decode(bufferlist::iterator &it){}
+ };
+
+- std::unordered_map<std::string, Entry*> m_oid_to_entry;
++ float m_watermark;
++ uint64_t m_entry_count;
+
+- LRU m_free_lru;
+- LRU m_handing_lru; // include promoting status or evicting status
+- LRU m_promoted_lru; // include promoted, using status.
++ std::unordered_map<std::string, Entry*> m_cache_map;
++ RWLock m_cache_map_lock;
+
+- mutable Mutex m_lock;
++ std::deque<Entry*> m_free_list;
++ Mutex m_free_list_lock;
+
+- float m_level;
+- uint64_t m_entry_count;
++ LRU m_promoted_lru; // include promoted, using status.
+
+ };
+
++} // namespace cache
++} // namespace rbd
+ #endif
+--
+2.7.4
+