object-storage: yield during directory tree walks

Have the co-routine (greenlet) yield during long running directory
tree walks to avoid starving out other greenlets that might be
available to run.

A directory walk involves statting every file in the tree to get it
size. For large numbers of files, this can get expensive. So we yield
after every stat call, and after processing every directory.

Change-Id: I07f1dfeef6a09b5817e0c237ecc748c491d52a31
BUG: 894674
Signed-off-by: Peter Portante <peter.portante@redhat.com>
Reviewed-on: http://review.gluster.org/4380
Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-by: Mohammed Junaid <junaid@redhat.com>
Tested-by: Mohammed Junaid <junaid@redhat.com>
This commit is contained in:
Peter Portante 2013-01-12 22:39:24 -05:00
parent f59e2c6f28
commit 1e9a2579a4

View File

@ -19,6 +19,7 @@ import errno
import xattr
import random
from hashlib import md5
from eventlet import sleep
import cPickle as pickle
from ConfigParser import ConfigParser, NoSectionError, NoOptionError
from swift.common.utils import normalize_timestamp, TRUE_VALUES
@ -233,27 +234,30 @@ def _update_list(path, cont_path, src_list, reg_file=True, object_count=0,
# strip the prefix off, also stripping the leading and trailing slashes
obj_path = path.replace(cont_path, '').strip(os.path.sep)
for i in src_list:
for obj_name in src_list:
if obj_path:
obj_list.append(os.path.join(obj_path, i))
obj_list.append(os.path.join(obj_path, obj_name))
else:
obj_list.append(i)
obj_list.append(obj_name)
object_count += 1
if reg_file:
bytes_used += os.path.getsize(path + '/' + i)
bytes_used += os.path.getsize(os.path.join(path, obj_name))
sleep()
return object_count, bytes_used
def update_list(path, cont_path, dirs=[], files=[], object_count=0,
bytes_used=0, obj_list=[]):
object_count, bytes_used = _update_list(path, cont_path, files, True,
object_count, bytes_used,
obj_list)
object_count, bytes_used = _update_list(path, cont_path, dirs, False,
object_count, bytes_used,
obj_list)
if files:
object_count, bytes_used = _update_list(path, cont_path, files, True,
object_count, bytes_used,
obj_list)
if dirs:
object_count, bytes_used = _update_list(path, cont_path, dirs, False,
object_count, bytes_used,
obj_list)
return object_count, bytes_used
@ -281,6 +285,7 @@ def _get_container_details_from_fs(cont_path):
obj_list)
dir_list.append((path, do_stat(path).st_mtime))
sleep()
return ContainerDetails(bytes_used, object_count, obj_list, dir_list)