Skip to content

Commit dc91003

Browse files
Updated PowerFlex/ScaleIO volume migration checks and rollback migration on failure
1 parent ea6f7f1 commit dc91003

2 files changed

Lines changed: 193 additions & 42 deletions

File tree

engine/storage/volume/src/main/java/org/apache/cloudstack/storage/volume/VolumeServiceImpl.java

Lines changed: 40 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,7 +1638,6 @@ public CopyVolumeContext(AsyncCompletionCallback<T> callback, AsyncCallFuture<Vo
16381638
this.destVolume = destVolume;
16391639
this.future = future;
16401640
}
1641-
16421641
}
16431642

16441643
protected AsyncCallFuture<VolumeApiResult> copyVolumeFromImageToPrimary(VolumeInfo srcVolume, DataStore destStore) {
@@ -1821,10 +1820,10 @@ protected Void copyVolumeCallBack(AsyncCallbackDispatcher<VolumeServiceImpl, Cop
18211820
srcVolume.processEvent(Event.OperationFailed);
18221821
destroyVolume(destVolume.getId());
18231822
if (destVolume.getStoragePoolType() == StoragePoolType.PowerFlex) {
1824-
if (canVolumeBeRemoved(destVolume.getId())) {
1825-
s_logger.info("Volume " + destVolume.getId() + " is not referred anywhere, can be removed");
1826-
volDao.remove(destVolume.getId());
1827-
}
1823+
s_logger.info("Dest volume " + destVolume.getId() + " can be removed");
1824+
destVolume.processEvent(Event.ExpungeRequested);
1825+
destVolume.processEvent(Event.OperationSuccessed);
1826+
volDao.remove(destVolume.getId());
18281827
future.complete(res);
18291828
return null;
18301829
}
@@ -1839,10 +1838,10 @@ protected Void copyVolumeCallBack(AsyncCallbackDispatcher<VolumeServiceImpl, Cop
18391838
try {
18401839
destroyVolume(srcVolume.getId());
18411840
if (srcVolume.getStoragePoolType() == StoragePoolType.PowerFlex) {
1842-
if (canVolumeBeRemoved(srcVolume.getId())) {
1843-
s_logger.info("Volume " + srcVolume.getId() + " is not referred anywhere, can be removed");
1844-
volDao.remove(srcVolume.getId());
1845-
}
1841+
s_logger.info("Src volume " + srcVolume.getId() + " can be removed");
1842+
srcVolume.processEvent(Event.ExpungeRequested);
1843+
srcVolume.processEvent(Event.OperationSuccessed);
1844+
volDao.remove(srcVolume.getId());
18461845
future.complete(res);
18471846
return null;
18481847
}
@@ -1868,6 +1867,21 @@ protected Void copyVolumeCallBack(AsyncCallbackDispatcher<VolumeServiceImpl, Cop
18681867
return null;
18691868
}
18701869

1870+
private class CopyManagedVolumeContext<T> extends AsyncRpcContext<T> {
1871+
final VolumeInfo srcVolume;
1872+
final VolumeInfo destVolume;
1873+
final Host host;
1874+
final AsyncCallFuture<VolumeApiResult> future;
1875+
1876+
public CopyManagedVolumeContext(AsyncCompletionCallback<T> callback, AsyncCallFuture<VolumeApiResult> future, VolumeInfo srcVolume, VolumeInfo destVolume, Host host) {
1877+
super(callback);
1878+
this.srcVolume = srcVolume;
1879+
this.destVolume = destVolume;
1880+
this.host = host;
1881+
this.future = future;
1882+
}
1883+
}
1884+
18711885
private AsyncCallFuture<VolumeApiResult> copyManagedVolume(VolumeInfo srcVolume, DataStore destStore) {
18721886
AsyncCallFuture<VolumeApiResult> future = new AsyncCallFuture<VolumeApiResult>();
18731887
VolumeApiResult res = new VolumeApiResult(srcVolume);
@@ -1911,14 +1925,7 @@ private AsyncCallFuture<VolumeApiResult> copyManagedVolume(VolumeInfo srcVolume,
19111925
// Refresh the volume info from the DB.
19121926
destVolume = volFactory.getVolume(destVolume.getId(), destStore);
19131927

1914-
destVolume.processEvent(Event.CreateRequested);
1915-
srcVolume.processEvent(Event.MigrationRequested);
1916-
1917-
CopyVolumeContext<VolumeApiResult> context = new CopyVolumeContext<VolumeApiResult>(null, future, srcVolume, destVolume, destStore);
1918-
AsyncCallbackDispatcher<VolumeServiceImpl, CopyCommandResult> caller = AsyncCallbackDispatcher.create(this);
1919-
caller.setCallback(caller.getTarget().copyManagedVolumeCallBack(null, null)).setContext(context);
1920-
1921-
PrimaryDataStore srcPrimaryDataStore = (PrimaryDataStore) srcVolume.getDataStore();
1928+
PrimaryDataStore srcPrimaryDataStore = (PrimaryDataStore) srcVolume.getDataStore();
19221929
if (srcPrimaryDataStore.isManaged()) {
19231930
Map<String, String> srcPrimaryDataStoreDetails = new HashMap<String, String>();
19241931
srcPrimaryDataStoreDetails.put(PrimaryDataStore.MANAGED, Boolean.TRUE.toString());
@@ -1945,14 +1952,14 @@ private AsyncCallFuture<VolumeApiResult> copyManagedVolume(VolumeInfo srcVolume,
19451952

19461953
grantAccess(destVolume, hostWithPoolsAccess, destStore);
19471954

1948-
try {
1949-
motionSrv.copyAsync(srcVolume, destVolume, hostWithPoolsAccess, caller);
1950-
} finally {
1951-
if (srcPrimaryDataStore.isManaged()) {
1952-
revokeAccess(srcVolume, hostWithPoolsAccess, srcVolume.getDataStore());
1953-
}
1954-
revokeAccess(destVolume, hostWithPoolsAccess, destStore);
1955-
}
1955+
destVolume.processEvent(Event.CreateRequested);
1956+
srcVolume.processEvent(Event.MigrationRequested);
1957+
1958+
CopyManagedVolumeContext<VolumeApiResult> context = new CopyManagedVolumeContext<VolumeApiResult>(null, future, srcVolume, destVolume, hostWithPoolsAccess);
1959+
AsyncCallbackDispatcher<VolumeServiceImpl, CopyCommandResult> caller = AsyncCallbackDispatcher.create(this);
1960+
caller.setCallback(caller.getTarget().copyManagedVolumeCallBack(null, null)).setContext(context);
1961+
1962+
motionSrv.copyAsync(srcVolume, destVolume, hostWithPoolsAccess, caller);
19561963
} catch (Exception e) {
19571964
s_logger.error("Copy to managed volume failed due to: " + e);
19581965
if(s_logger.isDebugEnabled()) {
@@ -1965,13 +1972,20 @@ private AsyncCallFuture<VolumeApiResult> copyManagedVolume(VolumeInfo srcVolume,
19651972
return future;
19661973
}
19671974

1968-
protected Void copyManagedVolumeCallBack(AsyncCallbackDispatcher<VolumeServiceImpl, CopyCommandResult> callback, CopyVolumeContext<VolumeApiResult> context) {
1975+
protected Void copyManagedVolumeCallBack(AsyncCallbackDispatcher<VolumeServiceImpl, CopyCommandResult> callback, CopyManagedVolumeContext<VolumeApiResult> context) {
19691976
VolumeInfo srcVolume = context.srcVolume;
19701977
VolumeInfo destVolume = context.destVolume;
1978+
Host host = context.host;
19711979
CopyCommandResult result = callback.getResult();
19721980
AsyncCallFuture<VolumeApiResult> future = context.future;
19731981
VolumeApiResult res = new VolumeApiResult(destVolume);
1982+
19741983
try {
1984+
if (srcVolume.getDataStore() != null && ((PrimaryDataStore) srcVolume.getDataStore()).isManaged()) {
1985+
revokeAccess(srcVolume, host, srcVolume.getDataStore());
1986+
}
1987+
revokeAccess(destVolume, host, destVolume.getDataStore());
1988+
19751989
if (result.isFailed()) {
19761990
res.setResult(result.getResult());
19771991
destVolume.processEvent(Event.MigrationCopyFailed);

plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/client/ScaleIOGatewayClientImpl.java

Lines changed: 153 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -788,11 +788,62 @@ public boolean migrateVolume(final String srcVolumeId, final String destPoolId,
788788

789789
HttpResponse response = null;
790790
try {
791+
Volume volume = getVolume(srcVolumeId);
792+
if (volume == null || Strings.isNullOrEmpty(volume.getVtreeId())) {
793+
LOG.warn("Couldn't find the volume(-tree), can not migrate the volume " + srcVolumeId);
794+
return false;
795+
}
796+
797+
String srcPoolId = volume.getStoragePoolId();
798+
LOG.debug("Migrating the volume: " + srcVolumeId + " on the src pool: " + srcPoolId + " to the dest pool: " + destPoolId +
799+
" in the same PowerFlex cluster");
800+
791801
response = post(
792802
"/instances/Volume::" + srcVolumeId + "/action/migrateVTree",
793803
String.format("{\"destSPId\":\"%s\"}", destPoolId));
794804
checkResponseOK(response);
795-
return waitForVolumeMigrationToComplete(srcVolumeId, timeoutInSecs);
805+
806+
LOG.debug("Wait until the migration is complete for the volume: " + srcVolumeId);
807+
long migrationStartTime = System.currentTimeMillis();
808+
boolean status = waitForVolumeMigrationToComplete(volume.getVtreeId(), timeoutInSecs);
809+
810+
// Check volume storage pool and migration status
811+
// volume, v-tree, snapshot ids remains same after the migration
812+
volume = getVolume(srcVolumeId);
813+
if (volume == null || volume.getStoragePoolId() == null) {
814+
LOG.warn("Couldn't get the volume: " + srcVolumeId + " details after migration");
815+
return status;
816+
} else {
817+
String volumeOnPoolId = volume.getStoragePoolId();
818+
// confirm whether the volume is on the dest storage pool or not
819+
if (status && destPoolId.equalsIgnoreCase(volumeOnPoolId)) {
820+
LOG.debug("Migration success for the volume: " + srcVolumeId);
821+
return true;
822+
} else {
823+
try {
824+
// Check and pause any migration activity on the volume
825+
status = false;
826+
VTreeMigrationInfo.MigrationStatus migrationStatus = getVolumeTreeMigrationStatus(volume.getVtreeId());
827+
if (migrationStatus != null && migrationStatus != VTreeMigrationInfo.MigrationStatus.NotInMigration) {
828+
long timeElapsedInSecs = (System.currentTimeMillis() - migrationStartTime) / 1000;
829+
int timeRemainingInSecs = (int) (timeoutInSecs - timeElapsedInSecs);
830+
if (timeRemainingInSecs > (timeoutInSecs / 2)) {
831+
// Try to pause gracefully (continue the migration) if atleast half of the time is remaining
832+
pauseVolumeMigration(srcVolumeId, false);
833+
status = waitForVolumeMigrationToComplete(volume.getVtreeId(), timeRemainingInSecs);
834+
}
835+
}
836+
837+
if (!status) {
838+
rollbackVolumeMigration(srcVolumeId);
839+
}
840+
841+
return status;
842+
} catch (Exception ex) {
843+
LOG.warn("Exception on pause/rollback migration of the volume: " + srcVolumeId + " - " + ex.getLocalizedMessage());
844+
}
845+
}
846+
}
796847
} catch (final IOException e) {
797848
LOG.error("Failed to migrate PowerFlex volume due to:", e);
798849
checkResponseTimeOut(e);
@@ -801,39 +852,44 @@ public boolean migrateVolume(final String srcVolumeId, final String destPoolId,
801852
EntityUtils.consumeQuietly(response.getEntity());
802853
}
803854
}
855+
LOG.debug("Migration failed for the volume: " + srcVolumeId);
804856
return false;
805857
}
806858

807-
private boolean waitForVolumeMigrationToComplete(final String volumeId, int waitTimeInSec) {
808-
LOG.debug("Waiting for the migration to complete for the volume " + volumeId);
809-
Volume volume = getVolume(volumeId);
810-
if (volume == null || Strings.isNullOrEmpty(volume.getVtreeId())) {
811-
LOG.warn("Failed to get volume details, unable to check the migration status for the volume " + volumeId);
859+
private boolean waitForVolumeMigrationToComplete(final String volumeTreeId, int waitTimeInSec) {
860+
LOG.debug("Waiting for the migration to complete for the volume-tree " + volumeTreeId);
861+
if (Strings.isNullOrEmpty(volumeTreeId)) {
862+
LOG.warn("Invalid volume-tree id, unable to check the migration status of the volume-tree " + volumeTreeId);
812863
return false;
813864
}
814865

815-
String volumeTreeId = volume.getVtreeId();
816866
while (waitTimeInSec > 0) {
817-
VTreeMigrationInfo.MigrationStatus migrationStatus = getVolumeTreeMigrationStatus(volumeTreeId);
818-
if (migrationStatus != null && migrationStatus == VTreeMigrationInfo.MigrationStatus.NotInMigration) {
819-
LOG.debug("Migration completed for the volume " + volumeId);
820-
return true;
821-
}
822-
823-
waitTimeInSec--;
824-
825867
try {
826868
Thread.sleep(1000); // Try every sec and return after migration is complete
869+
870+
VTreeMigrationInfo.MigrationStatus migrationStatus = getVolumeTreeMigrationStatus(volumeTreeId);
871+
if (migrationStatus != null && migrationStatus == VTreeMigrationInfo.MigrationStatus.NotInMigration) {
872+
LOG.debug("Migration completed for the volume-tree " + volumeTreeId);
873+
return true;
874+
}
827875
} catch (Exception ex) {
876+
LOG.warn("Exception while checking for migration status of the volume-tree: " + volumeTreeId + " - " + ex.getLocalizedMessage());
828877
// don't do anything
878+
} finally {
879+
waitTimeInSec--;
829880
}
830881
}
831882

832-
LOG.debug("Unable to complete the migration for the volume " + volumeId);
883+
LOG.debug("Unable to complete the migration for the volume-tree " + volumeTreeId);
833884
return false;
834885
}
835886

836887
private VTreeMigrationInfo.MigrationStatus getVolumeTreeMigrationStatus(final String volumeTreeId) {
888+
if (Strings.isNullOrEmpty(volumeTreeId)) {
889+
LOG.warn("Invalid volume-tree id, unable to get the migration status of the volume-tree " + volumeTreeId);
890+
return null;
891+
}
892+
837893
HttpResponse response = null;
838894
try {
839895
response = get("/instances/VTree::" + volumeTreeId);
@@ -854,6 +910,87 @@ private VTreeMigrationInfo.MigrationStatus getVolumeTreeMigrationStatus(final St
854910
return null;
855911
}
856912

913+
private boolean rollbackVolumeMigration(final String srcVolumeId) {
914+
Preconditions.checkArgument(!Strings.isNullOrEmpty(srcVolumeId), "src volume id cannot be null");
915+
916+
HttpResponse response = null;
917+
try {
918+
Volume volume = getVolume(srcVolumeId);
919+
VTreeMigrationInfo.MigrationStatus migrationStatus = getVolumeTreeMigrationStatus(volume.getVtreeId());
920+
if (migrationStatus != null && migrationStatus == VTreeMigrationInfo.MigrationStatus.NotInMigration) {
921+
LOG.debug("Volume: " + srcVolumeId + " is not migrating, no need to rollback");
922+
return true;
923+
}
924+
925+
pauseVolumeMigration(srcVolumeId, true); // Pause forcefully
926+
// Wait few secs for volume migration to change to Paused state
927+
boolean paused = false;
928+
int retryCount = 5;
929+
while (retryCount > 0) {
930+
try {
931+
Thread.sleep(1000); // Try every sec
932+
migrationStatus = getVolumeTreeMigrationStatus(volume.getVtreeId()); // Get updated migration status
933+
if (migrationStatus != null && migrationStatus == VTreeMigrationInfo.MigrationStatus.Paused) {
934+
LOG.debug("Migration for the volume: " + srcVolumeId + " paused");
935+
paused = true;
936+
break;
937+
}
938+
} catch (Exception ex) {
939+
LOG.warn("Exception while checking for migration pause status of the volume: " + srcVolumeId + " - " + ex.getLocalizedMessage());
940+
// don't do anything
941+
} finally {
942+
retryCount--;
943+
}
944+
}
945+
946+
if (paused) {
947+
// Rollback migration to the src pool (should be quick)
948+
response = post(
949+
"/instances/Volume::" + srcVolumeId + "/action/migrateVTree",
950+
String.format("{\"destSPId\":\"%s\"}", volume.getStoragePoolId()));
951+
checkResponseOK(response);
952+
return true;
953+
} else {
954+
LOG.warn("Migration for the volume: " + srcVolumeId + " didn't pause, couldn't rollback");
955+
}
956+
} catch (final IOException e) {
957+
LOG.error("Failed to rollback volume migration due to: ", e);
958+
checkResponseTimeOut(e);
959+
} finally {
960+
if (response != null) {
961+
EntityUtils.consumeQuietly(response.getEntity());
962+
}
963+
}
964+
return false;
965+
}
966+
967+
private boolean pauseVolumeMigration(final String volumeId, final boolean forced) {
968+
if (Strings.isNullOrEmpty(volumeId)) {
969+
LOG.warn("Invalid Volume Id, Unable to pause migration of the volume " + volumeId);
970+
return false;
971+
}
972+
973+
HttpResponse response = null;
974+
try {
975+
// When paused gracefully, all data currently being moved is allowed to complete the migration.
976+
// When paused forcefully, migration of unfinished data is aborted and data is left at the source, if possible.
977+
// Pausing forcefully carries a potential risk to data.
978+
response = post(
979+
"/instances/Volume::" + volumeId + "/action/pauseVTreeMigration",
980+
String.format("{\"pauseType\":\"%s\"}", forced ? "Forcefully" : "Gracefully"));
981+
checkResponseOK(response);
982+
return true;
983+
} catch (final IOException e) {
984+
LOG.error("Failed to pause migration of the volume due to: ", e);
985+
checkResponseTimeOut(e);
986+
} finally {
987+
if (response != null) {
988+
EntityUtils.consumeQuietly(response.getEntity());
989+
}
990+
}
991+
return false;
992+
}
993+
857994
///////////////////////////////////////////////////////
858995
//////////////// StoragePool APIs /////////////////////
859996
///////////////////////////////////////////////////////

0 commit comments

Comments
 (0)