我目前正在使用机器学习分析网站的网络日志。我正在清理数据,并希望为此网站识别唯一的 访客 。
我在处理网络日志方面没有多少经验,但很明显,当用户访问时,会检索到多个文件(例如下面显示的cs.uri.stem
列中的记录)。
我的问题,当用户浏览多个页面时(例如从第A页的链接转到页面B)怎么样?我怎么知道他在这个网站上的行为?
此外,任何人都可以建议任何有助于分析网络日志的伟大的python库吗?
非常感谢!!!
date time s.ip cs.method cs.uri.stem cs.uri.query s.port cs.username c.ip sc.status sc.substatus sc.win32.status time.taken device os browser
1 2014-08-05 00:00:03 10.130.0.12 GET / - 80 - 67.205.67.76 200 0 0 1391 Spider Other PingdomBot_1.4
2 2014-08-05 00:00:11 10.130.0.12 GET /about-the-hotel.aspx - 80 - 70.56.59.43 200 0 0 1194 PC Mac_OS_X_10.8 Firefox_31.0
3 2014-08-05 00:00:11 10.130.0.12 GET /~/media/Images/Hotel_ICON_revamp/about+us/a-hotel-unlike-any-others.ashx - 80 - 70.56.59.43 200 0 0 976 PC Mac_OS_X_10.8 Firefox_31.0
4 2014-08-05 00:00:12 10.130.0.12 GET /~/media/Images/Hotel_ICON_revamp/about+us/0713-ExComTeam.ashx - 80 - 70.56.59.43 200 0 0 1620 PC Mac_OS_X_10.8 Firefox_31.0
5 2014-08-05 00:00:12 10.130.0.12 GET /~/media/Images/Hotel_ICON_revamp/about+us/icon/vivienne-tam.ashx - 80 - 70.56.59.43 200 0 0 1713 PC Mac_OS_X_10.8 Firefox_31.0
6 2014-08-05 00:00:12 10.130.0.12 GET /~/media/Images/Hotel_ICON_revamp/about+us/icon/william-lim.ashx - 80 - 70.56.59.43 200 0 0 2387 PC Mac_OS_X_10.8 Firefox_31.0
7 2014-08-05 00:00:14 10.130.0.12 GET /~/media/Images/Hotel_ICON_revamp/about+us/icon/barney-cheng.ashx - 80 - 70.56.59.43 200 0 0 2180 PC Mac_OS_X_10.8 Firefox_31.0
8 2014-08-05 00:00:14 10.130.0.12 GET /~/media/Images/Hotel_ICON_revamp/about+us/icon/tommy-li.ashx - 80 - 70.56.59.43 200 0 0 1146 PC Mac_OS_X_10.8 Firefox_31.0
9 2014-08-05 00:00:14 10.130.0.12 GET /~/media/Images/Hotel_ICON_revamp/about+us/icon/yang-rutherford.ashx - 80 - 70.56.59.43 200 0 0 869 PC Mac_OS_X_10.8 Firefox_31.0
10 2014-08-05 00:00:14 10.130.0.12 GET /~/media/Images/Hotel_ICON_revamp/about+us/icon/justin_wong_img1.ashx - 80 - 70.56.59.43 200 0 0 845 PC Mac_OS_X_10.8 Firefox_31.0
答案 0 :(得分:1)
查看List<int> myList = new List<int>();
int i = 0;
for (i = 0; i < MyCommonClass.OraDataSet.Tables["Table1"].Rows.Count; i++)
{
object asd = MyCommonClass.OraDataSet.Tables["Tale1"].Rows[i]["CHECK_STATUS"];
if (!(asd is DBNull))
{
Convert.ToBoolean(asd);
myList.Add(i);
}
}
if ((myList.Count > 0) && (RequestActionId == 7))
{
for (i = 0; i < myList.Count; i++)
{
DataSet ds1 = new DataSet();
ds1.Tables.Add(MyCommonClass.OraDataSet.Tables["table1"].Clone());
DataRow drNew = ds1.Tables["table1"].NewRow();
DataRow drSrc = MyCommonClass.OraDataSet.Tables["table1"].Rows[i];
MyCommonClass.CopyRow(drSrc, drNew);
drNew["Req_Action_Id"] = RequestActionId;
drNew["Update_Login"] = MyCommonClass.prSrvObject.GetSecurityPermission().Tables["TUsers"].Rows[0]["User_Name"].ToString();
drNew["Update_User_FIO"] = MyCommonClass.prSrvObject.GetSecurityPermission().Tables["TUsers"].Rows[0]["User_Fio"].ToString();
ds1.Tables["table1"].Rows.Add(drNew);
drNew.AcceptChanges();
if (drSrc.RowState == DataRowState.Added) drNew.SetAdded();
else if (drSrc.RowState == DataRowState.Modified || drSrc.RowState == DataRowState.Unchanged)
drNew.SetModified();
int iErrId = 0;
string sErrMsg = string.Empty;
//foreach (DataRow ds in ds1.Tables["table1"].Rows)
//{
//for (i = 0; i < myList.Count; i++)
//{
MessageBox.Show("1");
//Сохраняем заявку
MyCommonClass.prSrvObject.SaveRequest(ds1, out iErrId, out sErrMsg);
MessageBox.Show("2");
if (iErrId != 0) throw new Exception(sErrMsg);
MessageBox.Show("3");
ds1 = MyCommonClass.prSrvObject.GetRequests(Convert.ToInt32(drSrc["Req_Id"].ToString()),
out iErrId, out sErrMsg);
if (iErrId != 0) throw new Exception(sErrMsg);
//}
foreach (DataRow Row in MyCommonClass.OraDataSet.Tables["RequestActions"].Select("Req_Id = " + drSrc["Req_Id"].ToString()))
{
Row.AcceptChanges();
Row.Delete();
Row.AcceptChanges();
}
if (!(ds1.Tables["table1"] == null || ds1.Tables["table1"].Rows.Count == 0))
{
MyCommonClass.CopyRow(ds1.Tables["table1"].Rows[myList[i]], drSrc);
drSrc.AcceptChanges();
//grvRequest.RefreshRow(); //grvRequest.FocusedRowHandle
//grvRequest.RefreshRow(grvRequest.GetSelectedRows()[0]);
//DataRow drNew = ds1.Tables["table1"].NewRow();
//ds1.Tables.Add(MyCommonClass.OraDataSet.Tables["table1"].Clone());
//DataRow drSrc = MyCommonClass.OraDataSet.Tables["table1"].Rows[i];
foreach (DataRow Row in ds1.Tables["RequestActions"].Select())
{
drNew = MyCommonClass.OraDataSet.Tables["RequestActions"].NewRow();
MyCommonClass.CopyRow(Row, drNew);
MyCommonClass.OraDataSet.Tables["RequestActions"].Rows.Add(drNew);
drNew.AcceptChanges();
}
grvDetailView.RefreshData();
}
else
{
//DataRow drRow = grvRequest.GetDataRow(grvRequest.GetSelectedRows()[0]);
DataRow drRow = MyCommonClass.OraDataSet.Tables["table1"].Rows[i];
drRow.AcceptChanges();
drRow.Delete();
drRow.AcceptChanges();
}
XtraMessageBox.Show("text",
MessageBoxButtons.OK, MessageBoxIcon.Information);
}
}
库可能是个好主意。使用pandas加载数据后(参见示例here),应该可以直接找到以一列或多列为条件的唯一元素,例如here。