C# 中的 StringTokenizer 类

在Java中,有一个StringTokenizer类,对于分析单词非常有用。

C# 中,似乎没有系统自带的StringTokenizer类,不过,可以自己写一个。

网上搜索到一个非常专业的代码。如下:

using System;
using System.Collections;
using System.Text;

/// <summary>
/// StringTokenizer. A String Tokenizer that accepts Strings as source and delimiter. Only 1 delimiter is supported (either String or char[]).
/// </summary>
public class StringTokenizer
{
    private int curIndex;
    private int numTokens;
    private ArrayList tokens;
    private string source;
    private string delimiter;

    /// <summary>
    /// Constructor for StringTokenizer Class.
    /// </summary>
    /// 
    /// The Source String. 
    /// The Delimiter String. If a 0 length delimiter is given, " " (space) is used by default.
    ///
	public StringTokenizer(string source, string delimiter)
	{
        this.tokens = new ArrayList(10);
        this.source = source;
        this.delimiter = delimiter;

        if (delimiter.Length <= 0)
        {
            this.delimiter = " ";
        }
        this.Tokenize();
	}

    ///
    /// Constructor for StringTokenizer Class.
    /// 
    /// The Source String.
    /// The Delimiter String as a char[]. Note that this is converted into a single String and expects Unicode encoded chars.
    /// 
    public StringTokenizer(string source, char[] delimiter)
        : this(source, new string(delimiter))
    {
    }

    ///
    /// Constructor for StringTokenizer Class. The default delimiter of " " (space) is used.
    /// 
    /// The Source String.
    /// 
    public StringTokenizer(string source)
        : this(source, "")
    {
    }

    ///
    /// Empty Constructor. Will create an empty StringTokenizer with no source, no delimiter, and no tokens.
    /// If you want to use this StringTokenizer you will have to call the NewSource(string s) method. You may
    /// optionally call the NewDelim(string d) or NewDelim(char[] d) methods if you don't with to use the default
    /// delimiter of " " (space).
    /// 
    public StringTokenizer()
        : this("", "")
    {

    }

    private void Tokenize()
    {
        string tempSource = this.source;
        string tok = "";
        this.numTokens = 0;
        this.tokens.Clear();
        this.curIndex = 0;

        if (tempSource.IndexOf(this.delimiter) < 0 && tempSource.Length > 0) {
            this.numTokens = 1;
            this.curIndex = 0;
            this.tokens.Add(tempSource);
            this.tokens.TrimToSize();
            tempSource = "";
        }
        else if (tempSource.IndexOf(this.delimiter) < 0 && tempSource.Length <= 0)
        {
            this.numTokens = 0;
            this.curIndex = 0;
            this.tokens.TrimToSize();
        }
        while (tempSource.IndexOf(this.delimiter) >= 0)
        {
            // Delimiter at beginning of source String.
            if (tempSource.IndexOf(this.delimiter) == 0)
            {
                if (tempSource.Length > this.delimiter.Length)
                {
                    tempSource = tempSource.Substring(this.delimiter.Length);
                }
                else
                {
                    tempSource = "";
                }
            }
            else
            {
                tok = tempSource.Substring(0, tempSource.IndexOf(this.delimiter));
                this.tokens.Add(tok);
                if (tempSource.Length > (this.delimiter.Length + tok.Length))
                {
                    tempSource = tempSource.Substring(this.delimiter.Length + tok.Length);
                }
                else
                {
                    tempSource = "";
                }
            }
        }
        // we may have a string leftover.
        if (tempSource.Length > 0)
        {
            this.tokens.Add(tempSource);
        }
        this.tokens.TrimToSize();
        this.numTokens = this.tokens.Count;
    }

    ///
    /// Method to add or change this Instance's Source string. The delimiter will remain the same (either default of " " (space) or whatever you 
    /// constructed this StringTokenizer with or added with NewDelim(string d) or NewDelim(char[] d) ).
    /// 
    /// The new Source String.
    ///
    public void NewSource(string newSource)
    {
        this.source = newSource;
        this.Tokenize();
    }

    ///
    /// Method to add or change this Instance's Delimiter string. The source string will remain the same (either empty if you used Empty Constructor, or 
    /// the previous value of source from the call to a parameterized constructor or NewSource(string s) ).
    /// 
    /// The new Delimiter String
    /// 
    public void NewDelim(string newDel)
    {
        if (newDel.Length == 0)
        {
            this.delimiter = " ";
        }
        else
        {
            this.delimiter = newDel;
        }
        this.Tokenize();
    }

    ///
    /// Method to add or change this Instance's Delimiter string. The source string will remain the same (either empty if you used Empty Constructor, or 
    /// the previous vlaue of source from the call to a parameterized constructor or NewSource(string s) ).
    /// 
    /// The New Delimiter as a char[]. Note that this is converted into a single String and expects Unicode encoded chars.
    /// 
    public void NewDelim(char[] newDel)
    {
        string temp = new String(newDel);
        if (temp.Length == 0)
        {
            this.delimiter = " ";
        }
        else
        {
            this.delimiter = temp;
        }
        this.Tokenize();
    }

    ///
    /// Method to get the number of tokens in this StringTokenizer.
    /// 
    /// The number fo Tokens in the internal ArrayList.
    ///
    public int CountTokens()
    {
        return this.tokens.Count;
    }

    ///
    /// Method to probe for more tokens.
    /// 
    /// true if there are more tokens; false otherwise.
    /// 
    public bool HasMoreTokens() {
        if (this.curIndex <= (this.tokens.Count -1)) {
            return true;
        } else {
            return false;
        }
    }

    ///
    /// Method to get the next (string) token of this StringTokenizer.
    /// 
    /// A string representing the next token; null if no tokens or more tokens.
    /// 
    public string NextToken() {
        string returnString = "";
        if (this.curIndex <= this.tokens.Count -1) {
            returnString = (string)tokens[curIndex];
            this.curIndex ++;
            return returnString;
        } else {
            return null;
        }
    }

    ///
    /// Gets the Source string of this StringTokenizer.
    /// 
    /// A string representing the current Source.
    /// 
    public string Source {
        get {
            return this.source;
        }
    }

    ///
    /// Gets the Delimiter string of this StringTokenizer.
    /// 
    /// A string representing the current Delimiter.
    /// 
    public string Delimeter {
        get {
            return this.delimiter;
        }
    }

    ///
    /// Gets the tokens of this StringTokenizer.
    ///
    /// 涂鸦添加此属性过程,以便调用。
    /// 
    public ArrayList Tokens {
        get {
            return this.tokens;
        }
    }
}
 

Add comment

Loading