有人说用 Socket 请求 http 服务效率要比 HttpWebRequest 高很多, 但是又没有提供源码或者对比测试结果. 我对此很好奇, 到底能差多少? 所以决定自己写个类实现 Socket 请求 http 的功能.
下面的代码实现了基本的 http ,https 请求, 支持 gzip 解压, 分块传输.
经本人多次试验, 得出如下结论:
如果仅用 Socket 获取文本类型的内容, 且不考虑分块传输的情况, 那么 Socket 方式可比 HttpWebRequest 方式效率高 30%-40%.
如果考虑更多因素,需要做更多处理, 所以多数时候效率不如 HttpWebRequest, 当然与我写的代码效率有很大关系.
本文首发地址
欢迎共同探讨技术问题,提供建议, 请留言,我会尽快回复.
1 /* Name: Socket 实现 http 协议全功能版 2 * Version: v0.6 3 * Description: 此版本实现了 http 及 https 的 get 或 post 访问, 自动处理301,302跳转, 支持 gzip 解压, 分块传输. 4 * 支持的操作: 获取文本,图片,文件形式的内容. 5 * 使用方法: new HttpWebSocket(); 调用实例的 Get.... 方法. 6 * 声明: 本代码仅做技术探讨,可任意转载,请勿用于商业用途. 7 * 本人博客: http://blog.itnmg.net http://www.cnblogs.com/lhg-net 8 * 创建日期: 2013-01-15 9 * 修订日期: 2013-06-03 10 */ 11 using System; 12 using System.Collections.Generic; 13 using System.Net; 14 using System.Net.Sockets; 15 using System.Net.Security; 16 using System.Text; 17 using System.Text.RegularExpressions; 18 using System.IO; 19 using System.IO.Compression; 20 using System.Web; 21 using System.Drawing; 22 using System.Security.Cryptography.X509Certificates; 23 24 namespace ExtLibrary.Net 25 { 26 class HttpWebSocket 27 { 28 ///29 /// 获取或设置请求与回应的超时时间,默认3秒. 30 /// 31 public int TimeOut 32 { 33 get; 34 set; 35 } 36 37 ///38 /// 获取或设置请求cookie 39 /// 40 public ListCookies 41 { 42 get; 43 set; 44 } 45 46 /// 47 /// 获取请求返回的 HTTP 头部内容 48 /// 49 public HttpHeader HttpHeaders 50 { 51 get; 52 internal set; 53 } 54 55 ///56 /// 获取或设置错误信息分隔符 57 /// 58 private string ErrorMessageSeparate; 59 60 61 62 public HttpWebSocket() 63 { 64 this.TimeOut = 3; 65 this.Cookies = new List(); 66 this.ErrorMessageSeparate = ";;"; 67 this.HttpHeaders = new HttpHeader(); 68 } 69 70 71 72 /// 73 /// get或post方式请求一个 http 或 https 地址.使用 Socket 方式 74 /// 75 /// 请求绝对地址 76 /// 请求来源地址,可为空 77 /// post请求参数. 设置空值为get方式请求 78 ///返回图像 79 public Image GetImageUseSocket( string url, string referer, string postData = null ) 80 { 81 Image result = null; 82 MemoryStream ms = this.GetSocketResult( url, referer, postData ); 83 84 try 85 { 86 if ( ms != null ) 87 { 88 result = Image.FromStream( ms ); 89 } 90 } 91 catch ( Exception e ) 92 { 93 string ss = e.Message; 94 } 95 96 return result; 97 } 98 99 ///100 /// get或post方式请求一个 http 或 https 地址.使用 Socket 方式101 /// 102 /// 请求绝对地址103 /// post请求参数. 设置空值为get方式请求104 ///返回 html 内容,如果发生异常将返回上次http状态码及异常信息 105 public string GetHtmlUseSocket( string url, string postData = null )106 {107 return this.GetHtmlUseSocket( url, null, postData );108 }109 110 ///111 /// get或post方式请求一个 http 或 https 地址.使用 Socket 方式112 /// 113 /// 请求绝对地址114 /// 请求来源地址,可为空115 /// post请求参数. 设置空值为get方式请求116 ///返回 html 内容,如果发生异常将返回上次http状态码及异常信息 117 public string GetHtmlUseSocket( string url, string referer, string postData = null )118 {119 string result = string.Empty;120 121 try122 {123 MemoryStream ms = this.GetSocketResult( url, referer, postData );124 125 if ( ms != null )126 {127 result = Encoding.GetEncoding( string.IsNullOrWhiteSpace( this.HttpHeaders.Charset ) ? "UTF-8" : this.HttpHeaders.Charset ).GetString( ms.ToArray() );128 }129 }130 catch ( SocketException se )131 {132 result = this.HttpHeaders.ResponseStatusCode + this.ErrorMessageSeparate + se.ErrorCode.ToString() + this.ErrorMessageSeparate + se.SocketErrorCode.ToString( "G" ) + this.ErrorMessageSeparate + se.Message;133 }134 catch ( Exception e )135 {136 result = this.HttpHeaders.ResponseStatusCode + this.ErrorMessageSeparate + e.Message;137 }138 139 return result;140 }141 142 ///143 /// get或post方式请求一个 http 或 https 地址.144 /// 145 /// 请求绝对地址146 /// 请求来源地址,可为空147 /// post请求参数. 设置空值为get方式请求148 ///返回的已解压的数据内容 149 private MemoryStream GetSocketResult( string url, string referer, string postData )150 {151 if ( string.IsNullOrWhiteSpace( url ) )152 {153 throw new UriFormatException( "'Url' cannot be empty." );154 }155 156 MemoryStream result = null;157 Uri uri = new Uri( url );158 159 if ( uri.Scheme == "http" )160 {161 result = this.GetHttpResult( uri, referer, postData );162 }163 else if ( uri.Scheme == "https" )164 {165 result = this.GetSslResult( uri, referer, postData );166 }167 else168 {169 throw new ArgumentException( "url must start with HTTP or HTTPS.", "url" );170 }171 172 if ( !string.IsNullOrWhiteSpace( this.HttpHeaders.Location ) )173 {174 result = GetSocketResult( this.HttpHeaders.Location, uri.AbsoluteUri, null );175 }176 else177 {178 result = unGzip( result );179 }180 181 return result;182 }183 184 ///185 /// get或post方式请求一个 http 地址.186 /// 187 /// 请求绝对地址188 /// 请求来源地址,可为空189 /// post请求参数. 设置空值为get方式请求190 /// 输出包含头部内容的StringBuilder191 ///返回未解压的数据流 192 private MemoryStream GetHttpResult( Uri uri, string referer, string postData )193 {194 MemoryStream result = new MemoryStream( 10240 );195 Socket HttpSocket = new Socket( AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp );196 HttpSocket.SendTimeout = this.TimeOut * 1000;197 HttpSocket.ReceiveTimeout = this.TimeOut * 1000;198 199 try200 {201 byte[] send = GetSendHeaders( uri, referer, postData );202 HttpSocket.Connect( uri.Host, uri.Port );203 204 if ( HttpSocket.Connected )205 {206 HttpSocket.Send( send, SocketFlags.None );207 this.ProcessData( HttpSocket, ref result );208 }209 210 result.Flush();211 }212 finally213 {214 HttpSocket.Shutdown( SocketShutdown.Both );215 HttpSocket.Close();216 }217 218 result.Seek( 0, SeekOrigin.Begin );219 220 return result;221 }222 223 ///224 /// get或post方式请求一个 https 地址.225 /// 226 /// 请求绝对地址227 /// 请求来源地址,可为空228 /// post请求参数. 设置空值为get方式请求229 /// 输出包含头部内容的StringBuilder230 ///返回未解压的数据流 231 private MemoryStream GetSslResult( Uri uri, string referer, string postData )232 {233 MemoryStream result = new MemoryStream( 10240 );234 StringBuilder sb = new StringBuilder( 1024 );235 236 byte[] send = GetSendHeaders( uri, referer, postData );237 TcpClient client = new TcpClient( uri.Host, uri.Port );238 239 try240 {241 SslStream sslStream = new SslStream( client.GetStream(), true242 , new RemoteCertificateValidationCallback( ( sender, certificate, chain, sslPolicyErrors )243 => {244 return sslPolicyErrors == SslPolicyErrors.None;245 }246 ), null );247 sslStream.ReadTimeout = this.TimeOut * 1000;248 sslStream.WriteTimeout = this.TimeOut * 1000;249 250 X509Store store = new X509Store( StoreName.My );251 252 sslStream.AuthenticateAsClient( uri.Host, store.Certificates, System.Security.Authentication.SslProtocols.Default, false );253 254 if ( sslStream.IsAuthenticated )255 {256 sslStream.Write( send, 0, send.Length );257 sslStream.Flush();258 259 this.ProcessData( sslStream, ref result );260 }261 262 result.Flush();263 }264 finally265 {266 client.Close();267 }268 269 result.Seek( 0, SeekOrigin.Begin );270 271 return result;272 }273 274 ///275 /// 返回请求的头部内容276 /// 277 /// 请求绝对地址278 /// 请求来源地址,可为空279 /// post请求参数. 设置空值为get方式请求280 ///请求头部数据 281 private byte[] GetSendHeaders( Uri uri, string referer, string postData )282 {283 string sendString = @"{0} {1} HTTP/1.1284 Accept: text/html, application/xhtml+xml, */*285 Referer: {2}286 Accept-Language: zh-CN287 User-Agent: Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)288 Accept-Encoding: gzip, deflate289 Host: {3}290 Connection: Keep-Alive291 Cache-Control: no-cache292 ";293 294 sendString = string.Format( sendString, string.IsNullOrWhiteSpace( postData ) ? "GET" : "POST", uri.PathAndQuery295 , string.IsNullOrWhiteSpace( referer ) ? uri.AbsoluteUri : referer, uri.Host );296 297 if ( this.Cookies != null && this.Cookies.Count > 0 )298 {299 sendString += string.Format( "Cookie: {0}\r\n", string.Join( "; ", this.Cookies.ToArray() ) );300 }301 302 if ( string.IsNullOrWhiteSpace( postData ) )303 {304 sendString += "\r\n";305 }306 else307 {308 int dlength = Encoding.UTF8.GetBytes( postData ).Length;309 310 sendString += string.Format( @"Content-Type: application/x-www-form-urlencoded311 Content-Length: {0}312 313 {1}314 ", postData.Length, postData );315 }316 317 return Encoding.UTF8.GetBytes( sendString );318 ;319 }320 321 ///322 /// 设置此类的字段323 /// 324 /// 头部文本325 private void SetThisHeaders( string headText )326 {327 if ( string.IsNullOrWhiteSpace( headText ) )328 {329 throw new ArgumentNullException( "'WithHeadersText' cannot be empty." );330 }331 332 //Match m = Regex.Match( withHeadersText,@".*(?=\r\n\r\n)", RegexOptions.Singleline | RegexOptions.IgnoreCase );333 334 //if ( m == null || string.IsNullOrWhiteSpace( m.Value ) )335 //{336 // throw new HttpParseException( "'SetThisHeaders' method has bug." );337 //}338 339 string[] headers = headText.Split( new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries );340 341 if ( headers == null || headers.Length == 0 )342 {343 throw new ArgumentException( "'WithHeadersText' param format error." );344 }345 346 this.HttpHeaders = new HttpHeader();347 348 foreach ( string head in headers )349 {350 if ( head.StartsWith( "HTTP", StringComparison.OrdinalIgnoreCase ) )351 {352 string[] ts = head.Split( ' ' );353 if ( ts.Length > 1 )354 {355 this.HttpHeaders.ResponseStatusCode = ts[1];356 }357 }358 else if ( head.StartsWith( "Set-Cookie:", StringComparison.OrdinalIgnoreCase ) )359 {360 this.Cookies = this.Cookies ?? new List();361 string tCookie = head.Substring( 11, head.IndexOf( ";" ) < 0 ? head.Length - 11 : head.IndexOf( ";" ) - 10 ).Trim();362 363 if ( !this.Cookies.Exists( f => f.Split( '=' )[0] == tCookie.Split( '=' )[0] ) )364 {365 this.Cookies.Add( tCookie );366 }367 }368 else if ( head.StartsWith( "Location:", StringComparison.OrdinalIgnoreCase ) )369 {370 this.HttpHeaders.Location = head.Substring( 9 ).Trim();371 }372 else if ( head.StartsWith( "Content-Encoding:", StringComparison.OrdinalIgnoreCase ) )373 {374 if ( head.IndexOf( "gzip", StringComparison.OrdinalIgnoreCase ) >= 0 )375 {376 this.HttpHeaders.IsGzip = true;377 }378 }379 else if ( head.StartsWith( "Content-Type:", StringComparison.OrdinalIgnoreCase ) )380 {381 string[] types = head.Substring( 13 ).Split( new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries );382 383 foreach ( string t in types )384 {385 if ( t.IndexOf( "charset=", StringComparison.OrdinalIgnoreCase ) >= 0 )386 {387 this.HttpHeaders.Charset = t.Trim().Substring( 8 );388 }389 else if ( t.IndexOf( '/' ) >= 0 )390 {391 this.HttpHeaders.ContentType = t.Trim();392 }393 }394 }395 else if ( head.StartsWith( "Content-Length:", StringComparison.OrdinalIgnoreCase ) )396 {397 this.HttpHeaders.ContentLength = long.Parse( head.Substring( 15 ).Trim() );398 }399 else if ( head.StartsWith( "Transfer-Encoding:", StringComparison.OrdinalIgnoreCase ) && head.EndsWith( "chunked", StringComparison.OrdinalIgnoreCase ) )400 {401 this.HttpHeaders.IsChunk = true;402 }403 }404 }405 406 /// 407 /// 解压数据流408 /// 409 /// 数据流, 压缩或未压缩的.410 ///返回解压缩的数据流 411 private MemoryStream unGzip( MemoryStream data )412 {413 if ( data == null )414 {415 throw new ArgumentNullException( "data cannot be null.", "data" );416 }417 418 data.Seek( 0, SeekOrigin.Begin );419 MemoryStream result = data;420 421 if ( this.HttpHeaders.IsGzip )422 {423 GZipStream gs = new GZipStream( data, CompressionMode.Decompress );424 result = new MemoryStream( 1024 );425 426 try427 {428 byte[] buffer = new byte[1024];429 int length = -1;430 431 do432 {433 length = gs.Read( buffer, 0, buffer.Length );434 result.Write( buffer, 0, length );435 }436 while ( length != 0 );437 438 gs.Flush();439 result.Flush();440 }441 finally442 {443 gs.Close();444 }445 }446 447 return result;448 }449 450 451 ///452 /// 处理请求返回的数据.453 /// 454 ///数据源类型 455 /// 数据源实例456 /// 保存数据的流457 private void ProcessData( T reader, ref MemoryStream body )458 {459 byte[] data = new byte[10240];460 int bodyStart = -1;//数据部分起始位置461 int readLength = 0;462 463 bodyStart = GetHeaders( reader, ref data, ref readLength );464 465 if ( bodyStart >= 0 )466 {467 if ( this.HttpHeaders.IsChunk )468 {469 GetChunkData( reader, ref data, ref bodyStart, ref readLength, ref body );470 }471 else472 {473 GetBodyData( reader, ref data, bodyStart, readLength, ref body );474 }475 }476 }477 478 /// 479 /// 取得返回的http头部内容,并设置相关属性.480 /// 481 ///数据源类型 482 /// 数据源实例483 /// 待处理的数据484 /// 读取的长度485 ///数据内容的起始位置,返回-1表示未读完头部内容 486 private int GetHeaders( T reader, ref byte[] data, ref int readLength )487 {488 int result = -1;489 StringBuilder sb = new StringBuilder( 1024 );490 491 do492 {493 readLength = this.ReadData( reader, ref data );494 495 if ( result < 0 )496 {497 for ( int i = 0; i < data.Length; i++ )498 {499 char c = (char)data[i];500 sb.Append( c );501 502 if ( c == '\n' && string.Concat( sb[sb.Length - 4], sb[sb.Length - 3], sb[sb.Length - 2], sb[sb.Length - 1] ).Contains( "\r\n\r\n" ) )503 {504 result = i + 1;505 this.SetThisHeaders( sb.ToString() );506 break;507 }508 }509 }510 511 if ( result >= 0 )512 {513 break;514 }515 }516 while ( readLength > 0 );517 518 return result;519 }520 521 /// 522 /// 取得未分块数据的内容523 /// 524 ///数据源类型 525 /// 数据源实例526 /// 已读取未处理的字节数据527 /// 起始位置528 /// 读取的长度529 /// 保存块数据的流530 private void GetBodyData( T reader, ref byte[] data, int startIndex, int readLength, ref MemoryStream body )531 {532 int contentTotal = 0;533 534 if ( startIndex < data.Length )535 {536 int count = readLength - startIndex;537 body.Write( data, startIndex, count );538 contentTotal += count;539 }540 541 int tlength = 0;542 543 do544 {545 tlength = this.ReadData( reader, ref data );546 contentTotal += tlength;547 body.Write( data, 0, tlength );548 549 if ( this.HttpHeaders.ContentLength > 0 && contentTotal >= this.HttpHeaders.ContentLength )550 {551 break;552 }553 }554 while ( tlength > 0 );555 }556 557 /// 558 /// 取得分块数据559 /// 560 ///数据源类型 561 /// Socket实例562 /// 已读取未处理的字节数据563 /// 起始位置564 /// 读取的长度565 /// 保存块数据的流566 private void GetChunkData( T reader, ref byte[] data, ref int startIndex, ref int readLength, ref MemoryStream body )567 {568 int chunkSize = -1;//每个数据块的长度,用于分块数据.当长度为0时,说明读到数据末尾.569 570 while ( true )571 {572 chunkSize = this.GetChunkHead( reader, ref data, ref startIndex, ref readLength );573 this.GetChunkBody( reader, ref data, ref startIndex, ref readLength, ref body, chunkSize );574 575 if ( chunkSize <= 0 )576 {577 break;578 }579 }580 }581 582 /// 583 /// 取得分块数据的数据长度584 /// 585 ///数据源类型 586 /// Socket实例587 /// 已读取未处理的字节数据588 /// 起始位置589 /// 读取的长度590 ///块长度,返回0表示已到末尾. 591 private int GetChunkHead( T reader, ref byte[] data, ref int startIndex, ref int readLength )592 {593 int chunkSize = -1;594 List tChars = new List ();//用于临时存储块长度字符595 596 if ( startIndex >= data.Length || startIndex >= readLength )597 {598 readLength = this.ReadData( reader, ref data );599 startIndex = 0;600 }601 602 do603 {604 for ( int i = startIndex; i < readLength; i++ )605 {606 char c = (char)data[i];607 608 if ( c == '\n' )609 {610 try611 {612 chunkSize = Convert.ToInt32( new string( tChars.ToArray() ).TrimEnd( '\r' ), 16 );613 startIndex = i + 1;614 }615 catch ( Exception e )616 {617 throw new Exception( "Maybe exists 'chunk-ext' field.", e );618 }619 620 break;621 }622 623 tChars.Add( c );624 }625 626 if ( chunkSize >= 0 )627 {628 break;629 }630 631 startIndex = 0;632 readLength = this.ReadData( reader, ref data );633 }634 while ( readLength > 0 );635 636 return chunkSize;637 }638 639 /// 640 /// 取得分块传回的数据内容641 /// 642 ///数据源类型 643 /// Socket实例644 /// 已读取未处理的字节数据645 /// 起始位置646 /// 读取的长度647 /// 保存块数据的流648 /// 块长度649 private void GetChunkBody( T reader, ref byte[] data, ref int startIndex, ref int readLength, ref MemoryStream body, int chunkSize )650 {651 if ( chunkSize <= 0 )652 {653 return;654 }655 656 int chunkReadLength = 0;//每个数据块已读取长度657 658 if ( startIndex >= data.Length || startIndex >= readLength )659 {660 readLength = this.ReadData( reader, ref data );661 startIndex = 0;662 }663 664 do665 {666 int owing = chunkSize - chunkReadLength;667 int count = Math.Min( readLength - startIndex, owing );668 669 body.Write( data, startIndex, count );670 chunkReadLength += count;671 672 if ( owing <= count )673 {674 startIndex += count + 2;675 break;676 }677 678 startIndex = 0;679 readLength = this.ReadData( reader, ref data );680 }681 while ( readLength > 0 );682 }683 684 /// 685 /// 从数据源读取数据686 /// 687 ///数据源类型 688 /// 数据源689 /// 用于存储读取的数据690 ///读取的数据长度,无数据为-1 691 private int ReadData( T reader, ref byte[] data )692 {693 int result = -1;694 695 if ( reader is Socket )696 {697 result = (reader as Socket).Receive( data, SocketFlags.None );698 }699 else if ( reader is SslStream )700 {701 result = (reader as SslStream).Read( data, 0, data.Length );702 }703 704 return result;705 }706 }707 708 public class HttpHeader709 {710 /// 711 /// 获取请求回应状态码712 /// 713 public string ResponseStatusCode714 {715 get;716 internal set;717 }718 719 ///720 /// 获取跳转url721 /// 722 public string Location723 {724 get;725 internal set;726 }727 728 ///729 /// 获取是否由Gzip压缩730 /// 731 public bool IsGzip732 {733 get;734 internal set;735 }736 737 ///738 /// 获取返回的文档类型739 /// 740 public string ContentType741 {742 get;743 internal set;744 }745 746 ///747 /// 获取内容使用的字符集748 /// 749 public string Charset750 {751 get;752 internal set;753 }754 755 ///756 /// 获取内容长度757 /// 758 public long ContentLength759 {760 get;761 internal set;762 }763 764 ///765 /// 获取是否分块传输766 /// 767 public bool IsChunk768 {769 get;770 internal set;771 }772 }773 }