Expands unhealth check to kill instances that fail to restart quickly (#549)
diff --git a/lib/services_gae.dart b/lib/services_gae.dart
index cfe4aaa..90692bd 100644
--- a/lib/services_gae.dart
+++ b/lib/services_gae.dart
@@ -117,7 +117,7 @@
}
Future _processReadynessRequest(io.HttpRequest request) async {
- if (commonServerImpl.running) {
+ if (!commonServerImpl.isRestarting) {
request.response.statusCode = io.HttpStatus.ok;
} else {
request.response.statusCode = io.HttpStatus.internalServerError;
@@ -128,9 +128,9 @@
}
Future _processHealthRequest(io.HttpRequest request) async {
- if (commonServerImpl.running && !commonServerImpl.analysisServersRunning) {
- _logger.severe('CommonServer running without analysis servers. '
- 'Intentionally failing healthcheck.');
+ if (!commonServerImpl.isHealthy) {
+ _logger.severe('CommonServer is no longer healthy.'
+ ' Intentionally failing health check.');
request.response.statusCode = io.HttpStatus.internalServerError;
} else {
try {
@@ -140,15 +140,20 @@
await file.writeAsString('testing123\n' * 1000, flush: true);
final stat = await file.stat();
if (stat.size > 10000) {
+ _logger.info('CommonServer healthy and file system working.'
+ ' Passing health check.');
request.response.statusCode = io.HttpStatus.ok;
} else {
+ _logger.severe('CommonServer healthy, but filesystem is not.'
+ ' Intentionally failing health check.');
request.response.statusCode = io.HttpStatus.internalServerError;
}
} finally {
await tempDir.delete(recursive: true);
}
} catch (e) {
- _logger.severe('Failed to create temporary file: $e');
+ _logger.severe('CommonServer healthy, but failed to create temporary'
+ ' file: $e');
request.response.statusCode = io.HttpStatus.internalServerError;
}
}
diff --git a/lib/src/common_server_impl.dart b/lib/src/common_server_impl.dart
index 4030ffa..5f13521 100644
--- a/lib/src/common_server_impl.dart
+++ b/lib/src/common_server_impl.dart
@@ -27,6 +27,7 @@
class BadRequest implements Exception {
String cause;
+
BadRequest(this.cause);
}
@@ -48,8 +49,17 @@
analysisServer.analysisServer != null &&
flutterAnalysisServer.analysisServer != null;
- bool _running = false;
- bool get running => _running;
+ // If non-null, this value indicates that the server is starting/restarting
+ // and holds the time at which that process began. If null, the server is
+ // ready to handle requests.
+ DateTime _restartingSince = DateTime.now();
+
+ bool get isRestarting => (_restartingSince != null);
+
+ // If the server has been trying and failing to restart for more than a half
+ // hour, something is seriously wrong.
+ bool get isHealthy => (_restartingSince == null ||
+ DateTime.now().difference(_restartingSince).inMinutes < 30);
CommonServerImpl(
this.sdkPath,
@@ -90,7 +100,7 @@
}
}));
- _running = true;
+ _restartingSince = null;
}
Future<void> warmup({bool useHtml = false}) async {
@@ -112,7 +122,8 @@
}
Future<dynamic> shutdown() {
- _running = false;
+ _restartingSince = DateTime.now();
+
return Future.wait(<Future<dynamic>>[
analysisServer.shutdown(),
flutterAnalysisServer.shutdown(),