记一次Android项目网络优化
1,背景
去年年末换了新公司,发现公司App的网络响应速度十分之慢,一个页面十秒的等待时间甚至超时都是常态,一方面的原因是服务器部署在国外,另一方面App内部可能存在问题。
2,诊断
利用okhttp中的EventListener接口,可以方便的监听每次网络请求的生命周期。
代码如下:
open class HttpEventListener public constructor(
callId: Long,
url: HttpUrl,
private val callStartNanos: Long
) : EventListener() {
private var client = ""
private val df = SimpleDateFormat("yyyy-MM-dd HH:mm:ss::SSS", Locale.US)
private val sbLog: StringBuilder = StringBuilder(url.toString())
.append(" callId is:$callId").append(":\n")
private val netLog: NetLogRealmObj = NetLogRealmObj().apply {
startTime = df.format(Date())
api = url.toString()
this.callId = callId.toString()
}
private fun recordEventLog(httpEvent: String) {
val elapseNanos = System.nanoTime() - callStartNanos
val usedTimeNano = elapseNanos / 1000000000.0
val time = String.format(Locale.US, "%.3f", usedTimeNano)
val timeAndEvent = "$time-$httpEvent"
sbLog
.append(timeAndEvent)
.append(";\n")
if (httpCallIsEndOrFailed(httpEvent)) {
Log.i("HttpEventListener", sbLog.toString())
totalTimeUsed += usedTimeNano
Log.i("HttpEventListener", "TotalTimeUsed : $totalTimeUsed")
}
}
private fun httpCallIsEndOrFailed(httpEvent: String) =
httpEvent == "callEnd" || httpEvent.contains("callFailed")
override fun callStart(call: Call) {
super.callStart(call)
recordEventLog("callStart:$client:::${call.request()}")
netLog.method = call.request().method()
}
override fun dnsStart(call: Call, domainName: String) {
super.dnsStart(call, domainName)
recordEventLog("dnsStart:$domainName")
}
override fun dnsEnd(call: Call, domainName: String, inetAddressList: List<InetAddress>) {
super.dnsEnd(call, domainName, inetAddressList)
if (inetAddressList.isNotEmpty()) {
recordEventLog("dnsEnd:$inetAddressList")
inetAddressList.forEach {
netLog.ip += it.toString()
}
} else {
recordEventLog("dnsEnd:No IP get!!!")
}
}
override fun connectStart(call: Call, inetSocketAddress: InetSocketAddress, proxy: Proxy) {
super.connectStart(call, inetSocketAddress, proxy)
recordEventLog(
"connectStart: InetSocketAddress is: $inetSocketAddress Proxy is $proxy"
)
}
override fun secureConnectStart(call: Call) {
super.secureConnectStart(call)
recordEventLog("secureConnectStart: ${call.request()}")
}
override fun secureConnectEnd(call: Call, handshake: Handshake?) {
super.secureConnectEnd(call, handshake)
recordEventLog(
"secureConnectEnd: tlsVersion is :${handshake?.tlsVersion()
?: "unknown"}"
)
}
override fun connectEnd(
call: Call,
inetSocketAddress: InetSocketAddress,
proxy: Proxy,
protocol: Protocol?
) {
super.connectEnd(call, inetSocketAddress, proxy, protocol)
recordEventLog("connectEnd: Ip is:$inetSocketAddress")
}
override fun connectFailed(
call: Call,
inetSocketAddress: InetSocketAddress,
proxy: Proxy,
protocol: Protocol?,
ioe: IOException
) {
super.connectFailed(call, inetSocketAddress, proxy, protocol, ioe)
recordEventLog("connectFailed: $inetSocketAddress")
}
override fun connectionAcquired(call: Call, connection: Connection) {
super.connectionAcquired(call, connection)
recordEventLog("connectionAcquired: $connection")
}
override fun connectionReleased(call: Call, connection: Connection) {
super.connectionReleased(call, connection)
recordEventLog("connectionReleased:$connection")
}
override fun requestHeadersStart(call: Call) {
super.requestHeadersStart(call)
recordEventLog("requestHeadersStart")
}
override fun requestHeadersEnd(call: Call, request: Request) {
super.requestHeadersEnd(call, request)
recordEventLog("requestHeadersEnd")
}
override fun requestBodyStart(call: Call) {
super.requestBodyStart(call)
recordEventLog("requestBodyStart")
}
override fun requestBodyEnd(call: Call, byteCount: Long) {
super.requestBodyEnd(call, byteCount)
recordEventLog("requestBodyEnd: and the byteCount is $byteCount")
}
override fun responseHeadersStart(call: Call) {
super.responseHeadersStart(call)
recordEventLog("responseHeadersStart:${call.request()}")
}
override fun responseHeadersEnd(call: Call, response: Response) {
super.responseHeadersEnd(call, response)
recordEventLog("responseHeadersEnd:${call.request()}")
}
override fun requestFailed(call: Call, ioe: IOException) {
super.requestFailed(call, ioe)
recordEventLog("requestFailed:$ioe}")
}
override fun responseBodyStart(call: Call) {
super.responseBodyStart(call)
recordEventLog("responseBodyStart")
}
override fun responseBodyEnd(call: Call, byteCount: Long) {
super.responseBodyEnd(call, byteCount)
recordEventLog(
"responseBodyEnd: and the responseBody byte count is ${byteCount / 1000.0}" +
" KB"
)
}
override fun responseFailed(call: Call, ioe: IOException) {
super.responseFailed(call, ioe)
recordEventLog("responseFailed:$ioe")
}
override fun callEnd(call: Call) {
super.callEnd(call)
recordEventLog("callEnd")
}
override fun callFailed(call: Call, ioe: IOException) {
super.callFailed(call, ioe)
recordEventLog("callFailed: $ioe")
}
companion object {
private val nextCallId = AtomicLong(1L)
private var totalTimeUsed = 0.toDouble()
@JvmField
val FACTORY: Factory = Factory { call ->
val callId = nextCallId.getAndIncrement()
HttpEventListener(callId, call.request().url(), System.nanoTime())
}
@JvmField
val NoneFactory: Factory = Factory { NONE }
}
}
得到每个网络请求的生命周期:

发现红框中的流程,DNS解析—TCP握手—SSL握手,每个网络请求都在不断的重复。
理论上同一个域名,第一次请求完毕之后,之后所有的请求都应该直接复用Socket连接。如下图所示:

这也正是HTTP1.1协议相对于HTTP1.0协议的关键改进所在,而不负责任的垃圾代码,直接将项目中的网络请求速度拉回到1995年。
可以参考《Http权威指南》中的对比图,有没有复用连接,时间差异巨大:

因此, 问题1:没有复用Socket连接。
问题2:网络流量没有压缩处理,在没有本地加密机制的情况下,GZIP对json的压缩效果在50%以上,这样8年的老项目一直没有人发现这一点也让我倍感意外。
问题2相对容易解决,只需要在Ngix端开启对json的压缩配置就可以修复。
3,找到Root Cause
3.1: 第一个问题:多个okhttpInstance,
首先是代码中很傻的声明了3个okhttpInstance,完全独立的对象之间自然不能共享线程池、连接池。
因此,
解决方法1,强制收敛为一个单例提供唯一instance,这样的话代码改动较大,会影响原来代码的test case;
解决方法2,声明一个baseOkhttpClient,然后把这三个okhttpInstance从这个baseOkhttpClient中newBuilder出来,这样的话这3个okhttpInstance就能共享连接池,线程池,内部对象。
方法签名如下:
public Builder newBuilder() {
return new Builder(this);
}
可以看到共享的原理是把this传输给了这个new builder,所有如下的对象都是可以在多个okhttpInstance复用的:
Builder(OkHttpClient okHttpClient) {
this.dispatcher = okHttpClient.dispatcher;
this.proxy = okHttpClient.proxy;
this.protocols = okHttpClient.protocols;
this.connectionSpecs = okHttpClient.connectionSpecs;
this.interceptors.addAll(okHttpClient.interceptors);
this.networkInterceptors.addAll(okHttpClient.networkInterceptors);
this.eventListenerFactory = okHttpClient.eventListenerFactory;
this.proxySelector = okHttpClient.proxySelector;
this.cookieJar = okHttpClient.cookieJar;
this.internalCache = okHttpClient.internalCache;
this.cache = okHttpClient.cache;
this.socketFactory = okHttpClient.socketFactory;
this.sslSocketFactory = okHttpClient.sslSocketFactory;
this.certificateChainCleaner = okHttpClient.certificateChainCleaner;
this.hostnameVerifier = okHttpClient.hostnameVerifier;
this.certificatePinner = okHttpClient.certificatePinner;
this.proxyAuthenticator = okHttpClient.proxyAuthenticator;
this.authenticator = okHttpClient.authenticator;
this.connectionPool = okHttpClient.connectionPool;
this.dns = okHttpClient.dns;
this.followSslRedirects = okHttpClient.followSslRedirects;
this.followRedirects = okHttpClient.followRedirects;
this.retryOnConnectionFailure = okHttpClient.retryOnConnectionFailure;
this.callTimeout = okHttpClient.callTimeout;
this.connectTimeout = okHttpClient.connectTimeout;
this.readTimeout = okHttpClient.readTimeout;
this.writeTimeout = okHttpClient.writeTimeout;
this.pingInterval = okHttpClient.pingInterval;
}
至此第一个问题得到解决。
3.2: 第二个问题:sslSocketFactory对象每次都不同,
第一个问题解决后,发现Socket连接不能复用问题仍然存在。
为了找到根本原因,直接找到okhttp中负责判断Socket连接能否复用的源码,在RealConnection类中:
/**
* Returns true if this connection can carry a stream allocation to `address`. If non-null
* `route` is the resolved route for a connection.
*/
internal fun isEligible(address: Address, routes: List<Route>?): Boolean {
assertThreadHoldsLock()
// If this connection is not accepting new exchanges, we're done.
if (calls.size >= allocationLimit || noNewExchanges) return false
// If the non-host fields of the address don't overlap, we're done.
if (!this.route.address.equalsNonHost(address)) return false
// If the host exactly matches, we're done: this connection can carry the address.
if (address.url.host == this.route().address.url.host) {
return true // This connection is a perfect match.
}
// At this point we don't have a hostname match. But we still be able to carry the request if
// our connection coalescing requirements are met. See also:
// https://hpbn.co/optimizing-application-delivery/#eliminate-domain-sharding
// https://daniel.haxx.se/blog/2016/08/18/http2-connection-coalescing/
// 1. This connection must be HTTP/2.
if (http2Connection == null) return false
// 2. The routes must share an IP address.
if (routes == null || !routeMatchesAny(routes)) return false
// 3. This connection's server certificate's must cover the new host.
if (address.hostnameVerifier !== OkHostnameVerifier) return false
if (!supportsUrl(address.url)) return false
// 4. Certificate pinning must match the host.
try {
address.certificatePinner!!.check(address.url.host, handshake()!!.peerCertificates)
} catch (_: SSLPeerUnverifiedException) {
return false
}
return true // The caller's address can be carried by this connection.
}
直接打断点,然后开始网络请求,哪里return false,哪里就是Root Cause。
在if (!this.route.address.equalsNonHost(address)) return false这一行return,深入看看Address类 equalsNonHost的实现细节:
internal fun equalsNonHost(that: Address): Boolean {
return this.dns == that.dns &&
this.proxyAuthenticator == that.proxyAuthenticator &&
this.protocols == that.protocols &&
this.connectionSpecs == that.connectionSpecs &&
this.proxySelector == that.proxySelector &&
this.proxy == that.proxy &&
this.sslSocketFactory == that.sslSocketFactory &&
this.hostnameVerifier == that.hostnameVerifier &&
this.certificatePinner == that.certificatePinner &&
this.url.port == that.url.port
}
基本是比较了Address的所有属性,继续单步断点,发现是在this.sslSocketFactory == that.sslSocketFactory这个条件不满足,因此就诊断出了根本原因:sslSocketFactory的配置问题,每次网络请求的时候都是不同的sslSocketFactory对象。
原来的代码在实现证书锁定的时候,采用的是传统的sslSocketFactory, 这种方式比较繁琐,且项目中存在N多的域名需要pinning,因此果断采用okhttp的CertificatePinner, 虽然这个类的名字是证书绑定,实际上的实现却是公钥固定,okhttp的作者仍然这么命名,大概是因为至少在安全性的角度,这两者没有任何区别。如果公司有证书Rotate的机制,那么通常公钥固定更加有优势,因为一般而言,变化的是证书的有效期,而公钥是不会变的。然而,本公司的证书Rotate的机制是公钥跟着一起变,因此,这一点没区别。
简单声明:
CertificatePinner certPinner = new CertificatePinner.Builder()
.add(“xx1.domain1.com",
"sha256/4hw5tz+scE+TW+dasda+nU7tq1V8=")
.add(“xx2.domain1.com",
"sha256/4hw5tz+scE+TW+3432423+nU7tq1V8=")
.add(“xx.domain2.com",
"sha256/4hw5tz+scE+TW+iojjo+nU7tq1V8=")
.build();
OkHttpClient okHttpClient = new OkHttpClient.Builder()
.certificatePinner(certPinner)
.build();
删除掉原来的sslSocketFactory代码,第二个问题得以解决。
4,优化效果:
同样的手机,网络,操作路径,把所有的网络耗时加起来,对比之前的耗时,可以得到如下数据,优化掉的效果大概是54.38%:
这是还没有开启Gzip的情况下,否则数据会更加夸张。

| Title | Test1 | Test2 | Test3 | Test4 | Test5 | Test6 | avg net-time cost(seconds) | total net-request nums | avg cost time per request(seconds) | |
|---|---|---|---|---|---|---|---|---|---|---|
| New | Net time cost:(seconds) | 519.7 | 423.6 | 505.7 | 416.7 | 419.1 | 399.6 | 447.4 | 110 | 4.07 |
| Old | Net time cost:(seconds) | 807.4 | 813.3 | 1510.6 | 1316.2 | 698.1 | 738.4 | 980.7 | 110 | 8.92 |
| Optimize Ration: | (Old-New)/Old: | 54.38% |
5, 一个感叹
感觉项目已经搞不好了。