我有一个包含3列的csv文件。用户,文本和标签。每个用户都有多个文本和标签。 我想知道出现频率最高的标签,以确定每个用户的类别。
我试过了:
static
,它为所有用户返回相同的值
public class AlphabetFacade
{
private Dictionary<int, AlphabetFactory> _alphabetFactories = new Dictionary<int, AlphabetFactory>();
private Character GetChar(string character, int fontSize)
{
if (_alphabetFactories.ContainsKey(fontSize)) return _alphabetFactories[fontSize].GetCharacter(character);
var factoryNew = new AlphabetFactory(fontSize);
_alphabetFactories.Add(fontSize, factoryNew);
return factoryNew.GetCharacter(character);
}
}
internal class AlphabetFactory
{
private readonly int _fontSize;
private readonly Dictionary<string, Character> _characters = new Dictionary<string, Character>();
public AlphabetFactory(int fontSize)
{
_fontSize = fontSize;
}
public Character GetCharacter(string character)
{
if (_characters.ContainsKey(character)) return _characters[character];
var characterNew = new Character(_fontSize, character);
_characters.Add(character, characterNew);
return characterNew;
}
}
class Character
{
public Character(int size, string @char)
{
Size = size;
Char = @char;
}
public int Size { get; }
public string Char { get; }
}
我也试过
for i in df['user'].unique():
print (df['class'].value_counts())
得到了
4 3062
1 1250
0 393
3 281
2 13
Name: class, dtype: int64
这是示例数据 sample data 请帮忙
答案 0 :(得分:1)
要按组计算值,您可以将groupby
与pd.value_counts
:
df = pd.DataFrame([[1, 1], [1, 2], [1, 3], [1, 1], [1, 1], [1, 2],
[2, 1], [2, 3], [2, 2], [2, 2], [2, 3], [2, 3]],
columns=['user', 'class'])
res = df.groupby('user')['class'].apply(pd.value_counts).reset_index()
res.columns = ['user', 'class', 'count']
print(res)
user class count
0 1 1 3
1 1 2 2
2 1 3 1
3 2 3 3
4 2 2 2
5 2 1 1