从字符串过滤文本

时间:2014-08-29 08:41:54

标签: c#

我现在就这样:

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;

namespace ConsoleApplication2
{
   public class Program
    {
        static void Main(string[] args)
        {

            //Consider making this configurable
            const string sourceFile = "test2.txt";
            const string pattern = "http://10.123.9.66:80";
            //var FirstSeparatorLastNameExact = new[] { "nosyn_name_last_exact:(qxq" };
            //var SecondSeparatorLastNameExact = new[] { "qxq)" };

            string[] FirstSeparator = new string[] { "nosyn_name_last_exact:(qxq" };
            string[] SecondSeparator = new string[] { "qxq)" };

            string[] FirstSeperatorFirstName = new string[] {"nosyn_name_first_exact:(qxq"};
            string[] secondSeperatorFirstName = new string[]{"qxq)"};



            Regex re = new Regex("^(http|https)://");
            HttpWebResponse response;

           // var webClient = new WebClient();
            var times = new Dictionary<string, TimeSpan>();
            var stopwatch = new System.Diagnostics.Stopwatch();

            //Add header so if headers are tracked, it will show it is your application rather than something ambiguous
            //webClient.Headers.Add(HttpRequestHeader.UserAgent, "Response-Tester-Client");

            var urlList = new List<string>();

            //Loop through the lines in the file to get the urls 
            try
            {
                stopwatch.Start();
                using (var reader = new StreamReader(sourceFile))
                {

                    while (!reader.EndOfStream)
                    {
                        var urNewList = new List<string>();
                        var line = reader.ReadLine();
                        //line = line.Substring(line.IndexOf(pattern));
                        //line.Split("\t");
                        var columns = line.Split('\t');
                        //var result = line.Split(Seperator, StringSplitOptions.RemoveEmptyEntries)[1].Split(')')[0];
                        if (columns[2] == "R")
                        {
                            var url = columns[4] + "?" + columns[5];
                            urlList.Add(url);
                            Thread.Sleep(250);
                        }
                        //if (line.Contains(result))
                        //{
                            //MatchCollection matches = Regex.Matches(line, lastName);
                            //foreach (string lines in File.ReadLines(sourceFile))
                            //{

                                //var LastNameSearch = line.Split(FirstSeparatorLastNameExact, StringSplitOptions.RemoveEmptyEntries)[1];
                                //var resultLastNameSearch = LastNameSearch.Split(FirstSeparatorLastNameExact, StringSplitOptions.RemoveEmptyEntries)[0];


                                //var temp = line.Split(FirstSeparator, StringSplitOptions.RemoveEmptyEntries)[1];
                                //var result2 = temp.Split(SecondSeparator, StringSplitOptions.RemoveEmptyEntries)[0];
                                //Console.WriteLine(result2);

                        string[] result = line.Split(FirstSeperatorFirstName, StringSplitOptions.RemoveEmptyEntries);
                        if (result.Length > 2)
                        {
                            string[] inner = result[1].Split(')');
                            if (inner.Length > 1)
                            {
                                Console.WriteLine(inner[0]);
                                Console.WriteLine(result);
                            }
                        }


                        //var split = line.Split(FirstSeperatorFirstName, StringSplitOptions.RemoveEmptyEntries);

                        //if (split.Length > 1)
                        //{
                        //    Console.WriteLine(split[1].Split(')')[0]);
                        //   // Console.WriteLine(split);
                        //}


                    }
                }
            }

            catch (Exception e)
            {
                Console.WriteLine("An error occured while attempting to access the source file at {0}", sourceFile);
            }
            finally
            {
                //Stop, record and reset the stopwatch
                stopwatch.Stop();
                times.Add("FileReadTime", stopwatch.Elapsed);
                stopwatch.Reset();
            }

            //Try to connect to each url
            var counter = 1;
            foreach (var url in urlList)
            {
                try
                {
                    stopwatch.Start();

                    using (WebClient webClient = new WebClient())
                    {

                        webClient.Headers.Add(HttpRequestHeader.UserAgent, "Response-Tester-Client");

                       // HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                        request.Method = "POST";

                        //webClient.Dispose();
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine("An error occured while attempting to connect to {0}", url);
                }
                finally
                {
                    stopwatch.Stop();

                    //We use the counter for a friendlier url as the current ones are unwieldly
                    times.Add("Url " + counter, stopwatch.Elapsed);
                    counter++;

                    stopwatch.Reset();
                }
            }

            //Release the resources for the WebClient 
            //webClient.Dispose();

            //Write the response times

            Console.WriteLine("Url " + "\t\t\t\tLast Name");
            foreach (var key in times.Keys)
            {
                Console.WriteLine("{0}: {1}", key, times[key].TotalSeconds);
            }


            Console.ReadKey();
        }
    }
}

但我仍然得到错误:索引超出了数组的范围。那么如何改变呢?事实上它也必须删除qxq。我尝试使用上面声明的两个字符串

谢谢

2 个答案:

答案 0 :(得分:1)

  

如果没有nosyn_name_first_exact

,如何跳过该行

将代码拆分为不假设有代码。你有:

test.Split(FirstSeperatorFirstName, StringSplitOptions.RemoveEmptyEntries)[1]
    .Split(')')[0]
    .Dump();

将其更改为:

var split = test.Split(FirstSeperatorFirstName, StringSplitOptions.RemoveEmptyEntries);

if (split.Length > 1)
{
    split[1].Split(')')[0].Dump();
}

您发现if内可能会出现同样的问题,必要时会重复。

答案 1 :(得分:0)

您正在尝试从没有那么多值的数组中获取索引(索引超出范围)。

在尝试获取索引之前检查数组是否足够长,例如:

string[] result = test.Split(FirstSeperatorFirstName, StringSplitOptions.RemoveEmptyEntries);
if(result.Length > 2) {
  string[] inner = result[1].Split(')');
  if(inner.Length > 1) {
    inner[0].Dump();
  }
}

或者在尝试拆分之前检查字符串是否包含给定的子字符串:

if(test.Contains("nosyn_name_first_exact:(qxq")) {
  // Split and do whatever.
}