java 字符串截取的实例详解

java 字符串截取的实例详解

前言：

在Java中，字符串“abcd”与字符串“ab你好”的长度是一样，都是四个字符。

但对应的字节数不同，一个汉字占两个字节。

定义一个方法，按照指定的字节数来取子串。

如：对于“ab你好”，如果取三个字节，那么子串就是ab与“你”字的半个，那么半个就要舍弃。

如果取四个字节就是“ab你”，取五个字节还是“ab你”。

仅考虑GBK和utf-8编码

实例代码：

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

									import java.io.UnsupportedEncodingException;

									import org.junit.Test;

									/**

									 * @author<a href="mailto:953801304@qq.com" rel="external nofollow" >胡龙华</a>

									 * @version 2017-4-4 下午1:08:45

									 * @fileName StringCut.java

									 */

									public class StringCut {

									  @Test

									  public void analyze(){

									    String str1 = "你好abc";

									    byte[] bs1=null;

									    byte[] bs2=null;

									    try {

									       bs1 = str1.getBytes("GBK");

									       System.out.println("---GBK---");

									       for(byte b:bs1){

									         System.out.print(b+" ");

									       }

									       System.out.println();

									      //-60 -29 -70 -61 97 98 99 

									      // 发现规律，再gbk中一个中文汉字 都是以两个字节 小于0的数存储

									       bs2 = str1.getBytes("utf-8");

									       System.out.println("---utf-8---");

									       for(byte b:bs2){

									         System.out.print(b+" ");

									       }

									      //-28 -67 -96 -27 -91 -67 97 98 99 

									      // 发现规律，在utf-8中一个中文汉字 是以三个字节 小于0 的数存储

									    } catch (UnsupportedEncodingException e) {

									      e.printStackTrace();

									    }

									  }

									  /**

									   * 思路：从第len个往前数，连续2的倍数个负数则全部输出，单数个则去掉最后一个输出

									   * @param str

									   * @param len

									   * @return

									   */

									  private static String StringCutByGBK(String str,int len){

									    byte[] bs = null;

									    try {

									      int count = 0;

									      bs = str .getBytes("GBK");

									      for(int i=len-1;i>=0;i--){

									        if(bs[i]<0){

									          count++;

									        }else{

									          break;

									        }

									        // 0  1  2  3  4 5  6 7  8  9  10 11 12  

									      }  //-60 -29 -70 -61 -80 -95 97 98 99 -76 -17 -72 -25 

									      if(count%2==0){

									        String s=new String(bs, 0, len, "GBK");

									        System.out.println("截取"+len+"个字符："+s);

									      }else{

									        String s=new String(bs, 0, len-1, "GBK");

									        System.out.println("截取"+len+"个字符："+s);

									      }

									    } catch (UnsupportedEncodingException e) {

									      e.printStackTrace();

									    }

									    return null;

									  }

									  /**

									   * 思路：从第len个往前数，连续3的倍数个负数则全部输出，其他情况则去掉最后count%3个输出

									   * @param str

									   * @param len

									   * @return

									   */

									  private static String StringCutByUTF8(String str,int len){

									    byte[] bs = null;

									    try {

									      int count = 0;

									      bs = str .getBytes("UTF-8");

									      for(int i=len-1;i>=0;i--){

									        if(bs[i]<0){

									          count++;

									        }else{

									          break;

									        }

									      }  

									      // 0  1  2  3  4  5  6 7 8 9  10 11 12

									      //-60 -29 -70 -61 -80 -95 97 98 99 -76 -17 -72 -25 

									      if(count%3==0){

									        String s=new String(bs, 0, len, "UTF-8");

									        System.out.println("截取"+len+"个字符："+s);

									      }else{

									        String s=new String(bs, 0, len-count%3, "UTF-8");

									        System.out.println("截取"+len+"个字符："+s);

									      }

									    } catch (UnsupportedEncodingException e) {

									      e.printStackTrace();

									    }

									    return null;

									  }

									  @Test

									  public void TEST() {

									    String str = "你好啊abc达哥";

									    try {

									      System.out.println("---测试gbk---");

									      byte bs [] = str.getBytes("GBK");

									      for(int i=0;i<=bs.length;i++){

									        //System.out.print(bs[i]+" ");

									        StringCutByGBK(str,i);

									      }

									      System.out.println("---测试UTF-8---");

									      byte bs2 [] = str.getBytes("utf-8");

									      for(int i=0;i<=bs2.length;i++){

									        //System.out.print(bs[i]+" ");

									        StringCutByUTF8(str,i);

									      }

									    } catch (UnsupportedEncodingException e) {

									      // TODO Auto-generated catch block

									      e.printStackTrace();

									    }

									  }

									}

以上就是java 字符串截取的实例，如有疑问请留言或者到本站社区交流讨论，本站关于java的文章还有很多，希望大家多多搜索参阅，感谢阅读，希望能帮助到大家，谢谢大家对本站的支持！

原文链接：http://blog.csdn.net/weixin_37720904/article/details/69061436

java 字符串截取的实例详解

相关文章

热门资讯