From 3ba8c4545c3bbcbdaedf02c9f65603ebaaf7a76f Mon Sep 17 00:00:00 2001 From: Dodoj <31199261+Doordoorjay@users.noreply.github.com> Date: Tue, 13 May 2025 18:53:52 +1000 Subject: [PATCH 1/7] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=88=86=E8=AF=8D?= =?UTF-8?q?=E6=A8=A1=E5=9D=97=E8=87=B3=E5=A4=96=E9=83=A8gse=E4=BB=93?= =?UTF-8?q?=E5=BA=93=EF=BC=9B=E5=A2=9E=E5=8A=A0=E5=8E=86=E5=8F=B2=E7=83=AD?= =?UTF-8?q?=E8=AF=8D=E5=8A=9F=E8=83=BD=EF=BC=9B=E6=9B=B4=E6=94=B9=E9=80=BB?= =?UTF-8?q?=E8=BE=91=E5=88=B0=E6=9C=AC=E5=9C=B0=E7=BC=93=E5=AD=98=E6=B6=88?= =?UTF-8?q?=E6=81=AF=EF=BC=8C=E6=9E=81=E5=A4=A7=E5=B9=85=E5=BA=A6=E7=BC=A9?= =?UTF-8?q?=E7=9F=AD=E5=A4=84=E7=90=86=E6=97=B6=E9=97=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go.mod | 2 + go.sum | 5 + plugin/wordcount/main.go | 498 +++++++++++++++++++++++---------------- 3 files changed, 303 insertions(+), 202 deletions(-) diff --git a/go.mod b/go.mod index 9031dd0e4e..efb9f2d311 100644 --- a/go.mod +++ b/go.mod @@ -30,6 +30,7 @@ require ( github.com/fumiama/slowdo v0.0.0-20241001074058-27c4fe5259a4 github.com/fumiama/terasu v0.0.0-20241027183601-987ab91031ce github.com/fumiama/unibase2n v0.0.0-20240530074540-ec743fd5a6d6 + github.com/go-ego/gse v0.80.3 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 github.com/google/uuid v1.6.0 github.com/jinzhu/gorm v1.9.16 @@ -85,6 +86,7 @@ require ( github.com/tidwall/pretty v1.2.0 // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect + github.com/vcaesar/cedar v0.20.2 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect golang.org/x/exp/shiny v0.0.0-20250305212735-054e65f0b394 // indirect golang.org/x/mobile v0.0.0-20231127183840-76ac6878050a // indirect diff --git a/go.sum b/go.sum index 318781510c..4ee150f95f 100644 --- a/go.sum +++ b/go.sum @@ -92,6 +92,8 @@ github.com/gdamore/tcell v1.3.0/go.mod h1:Hjvr+Ofd+gLglo7RYKxxnzCBmev3BzsS67MebK github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498= github.com/go-audio/wav v1.0.0/go.mod h1:3yoReyQOsiARkvPl3ERCi8JFjihzG6WhjYpZCf5zAWE= +github.com/go-ego/gse v0.80.3 h1:YNFkjMhlhQnUeuoFcUEd1ivh6SOB764rT8GDsEbDiEg= +github.com/go-ego/gse v0.80.3/go.mod h1:Gt3A9Ry1Eso2Kza4MRaiZ7f2DTAvActmETY46Lxg0gU= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= @@ -192,6 +194,9 @@ github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFA github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk= github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= +github.com/vcaesar/cedar v0.20.2 h1:TDx7AdZhilKcfE1WvdToTJf5VrC/FXcUOW+KY1upLZ4= +github.com/vcaesar/cedar v0.20.2/go.mod h1:lyuGvALuZZDPNXwpzv/9LyxW+8Y6faN7zauFezNsnik= +github.com/vcaesar/tt v0.20.1 h1:D/jUeeVCNbq3ad8M7hhtB3J9x5RZ6I1n1eZ0BJp7M+4= github.com/wcharczuk/go-chart/v2 v2.1.2 h1:Y17/oYNuXwZg6TFag06qe8sBajwwsuvPiJJXcUcLL6E= github.com/wcharczuk/go-chart/v2 v2.1.2/go.mod h1:Zi4hbaqlWpYajnXB2K22IUYVXRXaLfSGNNR7P4ukyyQ= github.com/wdvxdr1123/ZeroBot v1.8.2-0.20250330133859-27c25d9412b5 h1:HsMcBsVpYuQv+W8pjX5WdwYROrFQP9c5Pbf4x4adDus= diff --git a/plugin/wordcount/main.go b/plugin/wordcount/main.go index bb91222d71..daa7caef6c 100644 --- a/plugin/wordcount/main.go +++ b/plugin/wordcount/main.go @@ -1,202 +1,296 @@ -// Package wordcount 聊天热词 -package wordcount - -import ( - "fmt" - "os" - "regexp" - "sort" - "strconv" - "strings" - "sync" - "time" - - "github.com/FloatTech/floatbox/binary" - fcext "github.com/FloatTech/floatbox/ctxext" - "github.com/FloatTech/floatbox/file" - ctrl "github.com/FloatTech/zbpctrl" - "github.com/FloatTech/zbputils/control" - "github.com/FloatTech/zbputils/ctxext" - "github.com/FloatTech/zbputils/img/text" - "github.com/golang/freetype" - "github.com/sirupsen/logrus" - "github.com/tidwall/gjson" - "github.com/wcharczuk/go-chart/v2" - zero "github.com/wdvxdr1123/ZeroBot" - "github.com/wdvxdr1123/ZeroBot/message" -) - -var ( - re = regexp.MustCompile(`^[一-龥]+$`) - stopwords []string -) - -func init() { - engine := control.AutoRegister(&ctrl.Options[*zero.Ctx]{ - DisableOnDefault: false, - Brief: "聊天热词", - Help: "- 热词 [群号] [消息数目]|热词 123456 1000", - PublicDataFolder: "WordCount", - }) - cachePath := engine.DataFolder() + "cache/" - _ = os.RemoveAll(cachePath) - _ = os.MkdirAll(cachePath, 0755) - engine.OnRegex(`^热词\s?(\d*)\s?(\d*)$`, zero.OnlyGroup, fcext.DoOnceOnSuccess(func(ctx *zero.Ctx) bool { - _, err := engine.GetLazyData("stopwords.txt", false) - if err != nil { - ctx.SendChain(message.Text("ERROR: ", err)) - return false - } - data, err := os.ReadFile(engine.DataFolder() + "stopwords.txt") - if err != nil { - ctx.SendChain(message.Text("ERROR: ", err)) - return false - } - stopwords = strings.Split(strings.ReplaceAll(binary.BytesToString(data), "\r", ""), "\n") - sort.Strings(stopwords) - logrus.Infoln("[wordcount]加载", len(stopwords), "条停用词") - return true - })).Limit(ctxext.LimitByUser).SetBlock(true). - Handle(func(ctx *zero.Ctx) { - _, err := file.GetLazyData(text.FontFile, control.Md5File, true) - if err != nil { - ctx.SendChain(message.Text("ERROR: ", err)) - return - } - b, err := os.ReadFile(text.FontFile) - if err != nil { - ctx.SendChain(message.Text("ERROR: ", err)) - return - } - font, err := freetype.ParseFont(b) - if err != nil { - ctx.SendChain(message.Text("ERROR: ", err)) - return - } - - ctx.SendChain(message.Text("少女祈祷中...")) - gid, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[1], 10, 64) - p, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[2], 10, 64) - if p > 10000 { - p = 10000 - } - if p == 0 { - p = 1000 - } - if gid == 0 { - gid = ctx.Event.GroupID - } - group := ctx.GetGroupInfo(gid, false) - if group.MemberCount == 0 { - ctx.SendChain(message.Text(zero.BotConfig.NickName[0], "未加入", group.Name, "(", gid, "),无法获得热词呢")) - return - } - today := time.Now().Format("20060102") - drawedFile := fmt.Sprintf("%s%d%s%dwordCount.png", cachePath, gid, today, p) - if file.IsExist(drawedFile) { - ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile)) - return - } - messageMap := make(map[string]int, 256) - msghists := make(chan *gjson.Result, 256) - go func() { - h := ctx.GetLatestGroupMessageHistory(gid) - messageSeq := h.Get("messages.0.message_seq").Int() - msghists <- &h - for i := 1; i < int(p/20) && messageSeq != 0; i++ { - h := ctx.GetGroupMessageHistory(gid, messageSeq) - msghists <- &h - messageSeq = h.Get("messages.0.message_seq").Int() - } - close(msghists) - }() - var wg sync.WaitGroup - var mapmu sync.Mutex - for h := range msghists { - wg.Add(1) - go func(h *gjson.Result) { - for _, v := range h.Get("messages.#.message").Array() { - tex := strings.TrimSpace(message.ParseMessageFromString(v.Str).ExtractPlainText()) - if tex == "" { - continue - } - for _, t := range ctx.GetWordSlices(tex).Get("slices").Array() { - tex := strings.TrimSpace(t.Str) - i := sort.SearchStrings(stopwords, tex) - if re.MatchString(tex) && (i >= len(stopwords) || stopwords[i] != tex) { - mapmu.Lock() - messageMap[tex]++ - mapmu.Unlock() - } - } - } - wg.Done() - }(h) - } - wg.Wait() - - wc := rankByWordCount(messageMap) - if len(wc) > 20 { - wc = wc[:20] - } - // 绘图 - if len(wc) == 0 { - ctx.SendChain(message.Text("ERROR: 历史消息为空或者无法获得历史消息")) - return - } - bars := make([]chart.Value, len(wc)) - for i, v := range wc { - bars[i] = chart.Value{ - Value: float64(v.Value), - Label: v.Key, - } - } - graph := chart.BarChart{ - Font: font, - Title: fmt.Sprintf("%s(%d)在%s号的%d条消息的热词top20", group.Name, gid, time.Now().Format("2006-01-02"), p), - Background: chart.Style{ - Padding: chart.Box{ - Top: 40, - }, - }, - Height: 500, - BarWidth: 25, - Bars: bars, - } - f, err := os.Create(drawedFile) - if err != nil { - ctx.SendChain(message.Text("ERROR: ", err)) - return - } - err = graph.Render(chart.PNG, f) - _ = f.Close() - if err != nil { - _ = os.Remove(drawedFile) - ctx.SendChain(message.Text("ERROR: ", err)) - return - } - ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile)) - }) -} - -func rankByWordCount(wordFrequencies map[string]int) pairlist { - pl := make(pairlist, len(wordFrequencies)) - i := 0 - for k, v := range wordFrequencies { - pl[i] = pair{k, v} - i++ - } - sort.Sort(sort.Reverse(pl)) - return pl -} - -type pair struct { - Key string - Value int -} - -type pairlist []pair - -func (p pairlist) Len() int { return len(p) } -func (p pairlist) Less(i, j int) bool { return p[i].Value < p[j].Value } -func (p pairlist) Swap(i, j int) { p[i], p[j] = p[j], p[i] } +// Package wordcount 聊天热词 +package wordcount + +import ( + "encoding/json" + "fmt" + "os" + "sort" + "strings" + "time" + + "github.com/FloatTech/floatbox/file" + ctrl "github.com/FloatTech/zbpctrl" + "github.com/FloatTech/zbputils/control" + "github.com/FloatTech/zbputils/img/text" + "github.com/go-ego/gse" + "github.com/golang/freetype" + "github.com/wcharczuk/go-chart/v2" + zero "github.com/wdvxdr1123/ZeroBot" + "github.com/wdvxdr1123/ZeroBot/message" +) + +var ( + stopwords map[string]struct{} + wordcountDataFolder string + seg gse.Segmenter +) + +type MessageRecord struct { + Time int64 `json:"time"` + Text string `json:"text"` +} + +func appendJSONLine(filePath string, record MessageRecord) { + f, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return + } + defer f.Close() + data, err := json.Marshal(record) + if err != nil { + return + } + f.Write(data) + f.WriteString("\n") +} + +func loadStopwords() { + stopwords = make(map[string]struct{}) + data, err := os.ReadFile(wordcountDataFolder + "stopwords.txt") + if err != nil { + return + } + for _, w := range strings.Split(strings.ReplaceAll(string(data), "\r", ""), "\n") { + w = strings.TrimSpace(w) + if w != "" { + stopwords[w] = struct{}{} + } + } +} + +func loadCustomDicts() { + err := seg.LoadDictEmbed("zh_s") + if err != nil { + fmt.Println("加载内置词典失败:", err) + } else { + fmt.Println("成功加载内置词典") + } +} + +func init() { + engine := control.AutoRegister(&ctrl.Options[*zero.Ctx]{ + DisableOnDefault: false, + Brief: "聊天热词", + Help: "- 热词 | 历史热词", + PublicDataFolder: "WordCount", + }) + wordcountDataFolder = engine.DataFolder() + _ = os.MkdirAll(wordcountDataFolder+"cache/", 0755) + + // 加载 stopwords.txt(如不存在) + _, err := engine.GetLazyData("stopwords.txt", false) + if err != nil { + fmt.Println("下载 stopwords.txt 失败:", err) + } + + loadStopwords() + loadCustomDicts() + + engine.OnMessage(zero.OnlyGroup). + Handle(func(ctx *zero.Ctx) { + gid := ctx.Event.GroupID + today := time.Now().Format("20060102") + groupFolder := fmt.Sprintf("%s/messages/%d/", wordcountDataFolder, gid) + _ = os.MkdirAll(groupFolder, 0755) + filePath := fmt.Sprintf("%s%s.json", groupFolder, today) + + textContent := strings.TrimSpace(message.ParseMessageFromString(ctx.Event.RawMessage).ExtractPlainText()) + if textContent == "" { + return + } + record := MessageRecord{Time: time.Now().Unix(), Text: textContent} + appendJSONLine(filePath, record) + }) + + engine.OnRegex(`^热词$`, zero.OnlyGroup). + Handle(func(ctx *zero.Ctx) { + _, _ = file.GetLazyData(text.FontFile, control.Md5File, true) + b, _ := os.ReadFile(text.FontFile) + font, _ := freetype.ParseFont(b) + + ctx.SendChain(message.Text("开始统计中...")) + gid := ctx.Event.GroupID + + baseFolder := fmt.Sprintf("%s/messages/%d/", wordcountDataFolder, gid) + today := time.Now().Format("20060102") + filePath := fmt.Sprintf("%s%s.json", baseFolder, today) + if !file.IsExist(filePath) { + ctx.SendChain(message.Text("ERROR: 今日无聊天记录")) + return + } + content, _ := os.ReadFile(filePath) + messages := []string{} + for _, line := range strings.Split(string(content), "\n") { + if strings.TrimSpace(line) == "" { + continue + } + var rec MessageRecord + if err := json.Unmarshal([]byte(line), &rec); err == nil { + messages = append(messages, rec.Text) + } + } + if len(messages) == 0 { + ctx.SendChain(message.Text("ERROR: 今日无有效聊天记录")) + return + } + + // 跳过stopword和2个字以下的词 + messageMap := make(map[string]int) + + for _, msg := range messages { + text := strings.TrimSpace(msg) + if text == "" { + continue + } + + segments := seg.Segment([]byte(text)) + words := gse.ToSlice(segments, true) + + for _, word := range words { + // 跳过停用词 + if _, isStopword := stopwords[word]; isStopword { + continue + } + // 跳过所有单字词 + if len([]rune(word)) < 2 { + continue + } + + messageMap[word]++ + } + } + + wc := rankByWordCount(messageMap) + if len(wc) > 20 { + wc = wc[:20] + } + + bars := make([]chart.Value, len(wc)) + for i, v := range wc { + bars[i] = chart.Value{Value: float64(v.Value), Label: v.Key} + } + + drawedFile := fmt.Sprintf("%s%d%swordCount.png", wordcountDataFolder+"cache/", gid, today) + graph := chart.BarChart{ + Font: font, + Title: "热词TOP20 - 今日", + Background: chart.Style{Padding: chart.Box{Top: 40}}, + Height: 500, + BarWidth: 35, + Bars: bars, + } + f, _ := os.Create(drawedFile) + _ = graph.Render(chart.PNG, f) + _ = f.Close() + ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile)) + }) + + //历史所有热词 + engine.OnRegex(`^(历史热词)$`, zero.OnlyGroup). + Handle(func(ctx *zero.Ctx) { + // 加载字体 + _, _ = file.GetLazyData(text.FontFile, control.Md5File, true) + b, _ := os.ReadFile(text.FontFile) + font, _ := freetype.ParseFont(b) + + ctx.SendChain(message.Text("开始统计历史热词中...")) + gid := ctx.Event.GroupID + + baseFolder := fmt.Sprintf("%s/messages/%d/", wordcountDataFolder, gid) + files, _ := os.ReadDir(baseFolder) + + messages := []string{} + for _, f := range files { + if !f.IsDir() && strings.HasSuffix(f.Name(), ".json") { + content, _ := os.ReadFile(baseFolder + f.Name()) + for _, line := range strings.Split(string(content), "\n") { + if strings.TrimSpace(line) == "" { + continue + } + var rec MessageRecord + if err := json.Unmarshal([]byte(line), &rec); err == nil { + messages = append(messages, rec.Text) + } + } + } + } + + if len(messages) == 0 { + ctx.SendChain(message.Text("ERROR: 没有历史聊天记录")) + return + } + + // 跳过stopword和2个字以下的词 + messageMap := make(map[string]int) + + for _, msg := range messages { + text := strings.TrimSpace(msg) + if text == "" { + continue + } + + segments := seg.Segment([]byte(text)) + words := gse.ToSlice(segments, true) + + for _, word := range words { + if _, isStopword := stopwords[word]; isStopword { + continue + } + + if len([]rune(word)) < 2 { + continue + } + + messageMap[word]++ + } + } + + wc := rankByWordCount(messageMap) + if len(wc) > 20 { + wc = wc[:20] + } + + bars := make([]chart.Value, len(wc)) + for i, v := range wc { + bars[i] = chart.Value{Value: float64(v.Value), Label: v.Key} + } + + drawedFile := fmt.Sprintf("%s%d_historyWordCount.png", wordcountDataFolder+"cache/", gid) + graph := chart.BarChart{ + Font: font, + Title: "热词TOP20 - 历史", + Background: chart.Style{Padding: chart.Box{Top: 40}}, + Height: 500, + BarWidth: 35, + Bars: bars, + } + f, _ := os.Create(drawedFile) + _ = graph.Render(chart.PNG, f) + _ = f.Close() + ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile)) + }) + +} + +type pair struct { + Key string + Value int +} + +type pairlist []pair + +func (p pairlist) Len() int { return len(p) } +func (p pairlist) Less(i, j int) bool { return p[i].Value < p[j].Value } +func (p pairlist) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +func rankByWordCount(wordFrequencies map[string]int) pairlist { + pl := make(pairlist, len(wordFrequencies)) + i := 0 + for k, v := range wordFrequencies { + pl[i] = pair{k, v} + i++ + } + sort.Sort(sort.Reverse(pl)) + return pl +} From b86ae3a04ebbcabfe6027ad9856f078ca9588336 Mon Sep 17 00:00:00 2001 From: Dodoj <31199261+Doordoorjay@users.noreply.github.com> Date: Tue, 13 May 2025 09:04:26 +0000 Subject: [PATCH 2/7] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=A0=B7=E5=BC=8F?= =?UTF-8?q?=E6=94=B9=E8=BF=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/wordcount/main.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/plugin/wordcount/main.go b/plugin/wordcount/main.go index daa7caef6c..d0904cc0e9 100644 --- a/plugin/wordcount/main.go +++ b/plugin/wordcount/main.go @@ -26,6 +26,7 @@ var ( seg gse.Segmenter ) +// 保存聊天消息的时间与内容到json type MessageRecord struct { Time int64 `json:"time"` Text string `json:"text"` @@ -37,12 +38,18 @@ func appendJSONLine(filePath string, record MessageRecord) { return } defer f.Close() + data, err := json.Marshal(record) if err != nil { return } - f.Write(data) - f.WriteString("\n") + // 错误处理 + if _, err := f.Write(data); err != nil { + return + } + if _, err := f.WriteString("\n"); err != nil { + return + } } func loadStopwords() { From c0848fafbcf4f460a29af6fa0460e8ba91320bdd Mon Sep 17 00:00:00 2001 From: Dodoj <31199261+Doordoorjay@users.noreply.github.com> Date: Tue, 13 May 2025 09:09:02 +0000 Subject: [PATCH 3/7] Fixed lint check --- plugin/wordcount/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin/wordcount/main.go b/plugin/wordcount/main.go index d0904cc0e9..7dd54c1c3a 100644 --- a/plugin/wordcount/main.go +++ b/plugin/wordcount/main.go @@ -26,7 +26,7 @@ var ( seg gse.Segmenter ) -// 保存聊天消息的时间与内容到json +// MessageRecord将保存聊天消息的时间与内容到json type MessageRecord struct { Time int64 `json:"time"` Text string `json:"text"` From d22c253d9c6aeb74b3d893b519d8a3afc371ed9b Mon Sep 17 00:00:00 2001 From: Dodoj <31199261+Doordoorjay@users.noreply.github.com> Date: Tue, 13 May 2025 09:11:34 +0000 Subject: [PATCH 4/7] Fix lint --- plugin/wordcount/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin/wordcount/main.go b/plugin/wordcount/main.go index 7dd54c1c3a..ced5af0d5b 100644 --- a/plugin/wordcount/main.go +++ b/plugin/wordcount/main.go @@ -26,7 +26,7 @@ var ( seg gse.Segmenter ) -// MessageRecord将保存聊天消息的时间与内容到json +// MessageRecord represents a chat message with timestamp and text to json. type MessageRecord struct { Time int64 `json:"time"` Text string `json:"text"` From 13e8d9d7a2cb704632d41cbcfe48e2c3d3c0b2ff Mon Sep 17 00:00:00 2001 From: Dodoj <31199261+Doordoorjay@users.noreply.github.com> Date: Tue, 13 May 2025 21:51:13 +1000 Subject: [PATCH 5/7] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=88=86=E8=AF=8D?= =?UTF-8?q?=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/wordcount/main.go | 365 +++++++++++++++------------------------ 1 file changed, 135 insertions(+), 230 deletions(-) diff --git a/plugin/wordcount/main.go b/plugin/wordcount/main.go index ced5af0d5b..ea218ca18e 100644 --- a/plugin/wordcount/main.go +++ b/plugin/wordcount/main.go @@ -2,282 +2,198 @@ package wordcount import ( - "encoding/json" "fmt" "os" + "regexp" "sort" + "strconv" "strings" + "sync" "time" + "github.com/FloatTech/floatbox/binary" + fcext "github.com/FloatTech/floatbox/ctxext" "github.com/FloatTech/floatbox/file" ctrl "github.com/FloatTech/zbpctrl" "github.com/FloatTech/zbputils/control" + "github.com/FloatTech/zbputils/ctxext" "github.com/FloatTech/zbputils/img/text" "github.com/go-ego/gse" "github.com/golang/freetype" + "github.com/sirupsen/logrus" + "github.com/tidwall/gjson" "github.com/wcharczuk/go-chart/v2" zero "github.com/wdvxdr1123/ZeroBot" "github.com/wdvxdr1123/ZeroBot/message" ) var ( - stopwords map[string]struct{} - wordcountDataFolder string - seg gse.Segmenter + re = regexp.MustCompile(`^[一-龥]+$`) + stopwords []string ) - -// MessageRecord represents a chat message with timestamp and text to json. -type MessageRecord struct { - Time int64 `json:"time"` - Text string `json:"text"` -} - -func appendJSONLine(filePath string, record MessageRecord) { - f, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) - if err != nil { - return - } - defer f.Close() - - data, err := json.Marshal(record) - if err != nil { - return - } - // 错误处理 - if _, err := f.Write(data); err != nil { - return - } - if _, err := f.WriteString("\n"); err != nil { - return - } -} - -func loadStopwords() { - stopwords = make(map[string]struct{}) - data, err := os.ReadFile(wordcountDataFolder + "stopwords.txt") - if err != nil { - return - } - for _, w := range strings.Split(strings.ReplaceAll(string(data), "\r", ""), "\n") { - w = strings.TrimSpace(w) - if w != "" { - stopwords[w] = struct{}{} - } - } -} - -func loadCustomDicts() { - err := seg.LoadDictEmbed("zh_s") - if err != nil { - fmt.Println("加载内置词典失败:", err) - } else { - fmt.Println("成功加载内置词典") - } -} +var seg gse.Segmenter func init() { engine := control.AutoRegister(&ctrl.Options[*zero.Ctx]{ DisableOnDefault: false, Brief: "聊天热词", - Help: "- 热词 | 历史热词", + Help: "- 热词 [群号] [消息数目]|热词 123456 1000", PublicDataFolder: "WordCount", }) - wordcountDataFolder = engine.DataFolder() - _ = os.MkdirAll(wordcountDataFolder+"cache/", 0755) - - // 加载 stopwords.txt(如不存在) - _, err := engine.GetLazyData("stopwords.txt", false) - if err != nil { - fmt.Println("下载 stopwords.txt 失败:", err) - } - - loadStopwords() - loadCustomDicts() - - engine.OnMessage(zero.OnlyGroup). + cachePath := engine.DataFolder() + "cache/" + // 读取gse内置中文词典 + _ = seg.LoadDictEmbed("zh_s") + _ = os.RemoveAll(cachePath) + _ = os.MkdirAll(cachePath, 0755) + engine.OnRegex(`^热词\s?(\d*)\s?(\d*)$`, zero.OnlyGroup, fcext.DoOnceOnSuccess(func(ctx *zero.Ctx) bool { + _, err := engine.GetLazyData("stopwords.txt", false) + if err != nil { + ctx.SendChain(message.Text("ERROR: ", err)) + return false + } + data, err := os.ReadFile(engine.DataFolder() + "stopwords.txt") + if err != nil { + ctx.SendChain(message.Text("ERROR: ", err)) + return false + } + stopwords = strings.Split(strings.ReplaceAll(binary.BytesToString(data), "\r", ""), "\n") + sort.Strings(stopwords) + logrus.Infoln("[wordcount]加载", len(stopwords), "条停用词") + return true + })).Limit(ctxext.LimitByUser).SetBlock(true). Handle(func(ctx *zero.Ctx) { - gid := ctx.Event.GroupID - today := time.Now().Format("20060102") - groupFolder := fmt.Sprintf("%s/messages/%d/", wordcountDataFolder, gid) - _ = os.MkdirAll(groupFolder, 0755) - filePath := fmt.Sprintf("%s%s.json", groupFolder, today) - - textContent := strings.TrimSpace(message.ParseMessageFromString(ctx.Event.RawMessage).ExtractPlainText()) - if textContent == "" { + _, err := file.GetLazyData(text.FontFile, control.Md5File, true) + if err != nil { + ctx.SendChain(message.Text("ERROR: ", err)) return } - record := MessageRecord{Time: time.Now().Unix(), Text: textContent} - appendJSONLine(filePath, record) - }) - - engine.OnRegex(`^热词$`, zero.OnlyGroup). - Handle(func(ctx *zero.Ctx) { - _, _ = file.GetLazyData(text.FontFile, control.Md5File, true) - b, _ := os.ReadFile(text.FontFile) - font, _ := freetype.ParseFont(b) - - ctx.SendChain(message.Text("开始统计中...")) - gid := ctx.Event.GroupID - - baseFolder := fmt.Sprintf("%s/messages/%d/", wordcountDataFolder, gid) - today := time.Now().Format("20060102") - filePath := fmt.Sprintf("%s%s.json", baseFolder, today) - if !file.IsExist(filePath) { - ctx.SendChain(message.Text("ERROR: 今日无聊天记录")) + b, err := os.ReadFile(text.FontFile) + if err != nil { + ctx.SendChain(message.Text("ERROR: ", err)) return } - content, _ := os.ReadFile(filePath) - messages := []string{} - for _, line := range strings.Split(string(content), "\n") { - if strings.TrimSpace(line) == "" { - continue - } - var rec MessageRecord - if err := json.Unmarshal([]byte(line), &rec); err == nil { - messages = append(messages, rec.Text) - } - } - if len(messages) == 0 { - ctx.SendChain(message.Text("ERROR: 今日无有效聊天记录")) + font, err := freetype.ParseFont(b) + if err != nil { + ctx.SendChain(message.Text("ERROR: ", err)) return } - // 跳过stopword和2个字以下的词 - messageMap := make(map[string]int) - - for _, msg := range messages { - text := strings.TrimSpace(msg) - if text == "" { - continue - } - - segments := seg.Segment([]byte(text)) - words := gse.ToSlice(segments, true) - - for _, word := range words { - // 跳过停用词 - if _, isStopword := stopwords[word]; isStopword { - continue - } - // 跳过所有单字词 - if len([]rune(word)) < 2 { - continue - } - - messageMap[word]++ - } + ctx.SendChain(message.Text("少女祈祷中...")) + gid, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[1], 10, 64) + p, _ := strconv.ParseInt(ctx.State["regex_matched"].([]string)[2], 10, 64) + if p > 10000 { + p = 10000 } - - wc := rankByWordCount(messageMap) - if len(wc) > 20 { - wc = wc[:20] + if p == 0 { + p = 1000 } - - bars := make([]chart.Value, len(wc)) - for i, v := range wc { - bars[i] = chart.Value{Value: float64(v.Value), Label: v.Key} + if gid == 0 { + gid = ctx.Event.GroupID } - - drawedFile := fmt.Sprintf("%s%d%swordCount.png", wordcountDataFolder+"cache/", gid, today) - graph := chart.BarChart{ - Font: font, - Title: "热词TOP20 - 今日", - Background: chart.Style{Padding: chart.Box{Top: 40}}, - Height: 500, - BarWidth: 35, - Bars: bars, - } - f, _ := os.Create(drawedFile) - _ = graph.Render(chart.PNG, f) - _ = f.Close() - ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile)) - }) - - //历史所有热词 - engine.OnRegex(`^(历史热词)$`, zero.OnlyGroup). - Handle(func(ctx *zero.Ctx) { - // 加载字体 - _, _ = file.GetLazyData(text.FontFile, control.Md5File, true) - b, _ := os.ReadFile(text.FontFile) - font, _ := freetype.ParseFont(b) - - ctx.SendChain(message.Text("开始统计历史热词中...")) - gid := ctx.Event.GroupID - - baseFolder := fmt.Sprintf("%s/messages/%d/", wordcountDataFolder, gid) - files, _ := os.ReadDir(baseFolder) - - messages := []string{} - for _, f := range files { - if !f.IsDir() && strings.HasSuffix(f.Name(), ".json") { - content, _ := os.ReadFile(baseFolder + f.Name()) - for _, line := range strings.Split(string(content), "\n") { - if strings.TrimSpace(line) == "" { - continue - } - var rec MessageRecord - if err := json.Unmarshal([]byte(line), &rec); err == nil { - messages = append(messages, rec.Text) - } - } - } + group := ctx.GetGroupInfo(gid, false) + if group.MemberCount == 0 { + ctx.SendChain(message.Text(zero.BotConfig.NickName[0], "未加入", group.Name, "(", gid, "),无法获得热词呢")) + return } - - if len(messages) == 0 { - ctx.SendChain(message.Text("ERROR: 没有历史聊天记录")) + today := time.Now().Format("20060102") + drawedFile := fmt.Sprintf("%s%d%s%dwordCount.png", cachePath, gid, today, p) + if file.IsExist(drawedFile) { + ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile)) return } - - // 跳过stopword和2个字以下的词 - messageMap := make(map[string]int) - - for _, msg := range messages { - text := strings.TrimSpace(msg) - if text == "" { - continue + messageMap := make(map[string]int, 256) + msghists := make(chan *gjson.Result, 256) + go func() { + h := ctx.GetLatestGroupMessageHistory(gid) + messageSeq := h.Get("messages.0.message_seq").Int() + msghists <- &h + for i := 1; i < int(p/20) && messageSeq != 0; i++ { + h := ctx.GetGroupMessageHistory(gid, messageSeq) + msghists <- &h + messageSeq = h.Get("messages.0.message_seq").Int() } - - segments := seg.Segment([]byte(text)) - words := gse.ToSlice(segments, true) - - for _, word := range words { - if _, isStopword := stopwords[word]; isStopword { - continue - } - - if len([]rune(word)) < 2 { - continue + close(msghists) + }() + var wg sync.WaitGroup + var mapmu sync.Mutex + for h := range msghists { + wg.Add(1) + go func(h *gjson.Result) { + for _, v := range h.Get("messages.#.message").Array() { + tex := strings.TrimSpace(message.ParseMessageFromString(v.Str).ExtractPlainText()) + if tex == "" { + continue + } + segments := seg.Segment([]byte(tex)) + words := gse.ToSlice(segments, true) + for _, word := range words { + word = strings.TrimSpace(word) + i := sort.SearchStrings(stopwords, word) + if re.MatchString(word) && (i >= len(stopwords) || stopwords[i] != word) { + mapmu.Lock() + messageMap[word]++ + mapmu.Unlock() + } + } } - - messageMap[word]++ - } + wg.Done() + }(h) } + wg.Wait() wc := rankByWordCount(messageMap) if len(wc) > 20 { wc = wc[:20] } - + // 绘图 + if len(wc) == 0 { + ctx.SendChain(message.Text("ERROR: 历史消息为空或者无法获得历史消息")) + return + } bars := make([]chart.Value, len(wc)) for i, v := range wc { - bars[i] = chart.Value{Value: float64(v.Value), Label: v.Key} + bars[i] = chart.Value{ + Value: float64(v.Value), + Label: v.Key, + } } - - drawedFile := fmt.Sprintf("%s%d_historyWordCount.png", wordcountDataFolder+"cache/", gid) graph := chart.BarChart{ - Font: font, - Title: "热词TOP20 - 历史", - Background: chart.Style{Padding: chart.Box{Top: 40}}, - Height: 500, - BarWidth: 35, - Bars: bars, + Font: font, + Title: fmt.Sprintf("%s(%d)在%s号的%d条消息的热词top20", group.Name, gid, time.Now().Format("2006-01-02"), p), + Background: chart.Style{ + Padding: chart.Box{ + Top: 40, + }, + }, + Height: 500, + BarWidth: 25, + Bars: bars, + } + f, err := os.Create(drawedFile) + if err != nil { + ctx.SendChain(message.Text("ERROR: ", err)) + return } - f, _ := os.Create(drawedFile) - _ = graph.Render(chart.PNG, f) + err = graph.Render(chart.PNG, f) _ = f.Close() + if err != nil { + _ = os.Remove(drawedFile) + ctx.SendChain(message.Text("ERROR: ", err)) + return + } ctx.SendChain(message.Image("file:///" + file.BOTPATH + "/" + drawedFile)) }) +} +func rankByWordCount(wordFrequencies map[string]int) pairlist { + pl := make(pairlist, len(wordFrequencies)) + i := 0 + for k, v := range wordFrequencies { + pl[i] = pair{k, v} + i++ + } + sort.Sort(sort.Reverse(pl)) + return pl } type pair struct { @@ -290,14 +206,3 @@ type pairlist []pair func (p pairlist) Len() int { return len(p) } func (p pairlist) Less(i, j int) bool { return p[i].Value < p[j].Value } func (p pairlist) Swap(i, j int) { p[i], p[j] = p[j], p[i] } - -func rankByWordCount(wordFrequencies map[string]int) pairlist { - pl := make(pairlist, len(wordFrequencies)) - i := 0 - for k, v := range wordFrequencies { - pl[i] = pair{k, v} - i++ - } - sort.Sort(sort.Reverse(pl)) - return pl -} From 5045f2feb6070a3d8680a3ca16dacc694e096f87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Tue, 13 May 2025 21:00:37 +0900 Subject: [PATCH 6/7] Update main.go From 4303b8f2f265632cbe17b20f2ab92c5799146edc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Tue, 13 May 2025 21:02:52 +0900 Subject: [PATCH 7/7] Update main.go --- plugin/wordcount/main.go | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/plugin/wordcount/main.go b/plugin/wordcount/main.go index ea218ca18e..1549388b80 100644 --- a/plugin/wordcount/main.go +++ b/plugin/wordcount/main.go @@ -11,6 +11,12 @@ import ( "sync" "time" + "github.com/go-ego/gse" + "github.com/golang/freetype" + "github.com/sirupsen/logrus" + "github.com/tidwall/gjson" + "github.com/wcharczuk/go-chart/v2" + "github.com/FloatTech/floatbox/binary" fcext "github.com/FloatTech/floatbox/ctxext" "github.com/FloatTech/floatbox/file" @@ -18,20 +24,17 @@ import ( "github.com/FloatTech/zbputils/control" "github.com/FloatTech/zbputils/ctxext" "github.com/FloatTech/zbputils/img/text" - "github.com/go-ego/gse" - "github.com/golang/freetype" - "github.com/sirupsen/logrus" - "github.com/tidwall/gjson" - "github.com/wcharczuk/go-chart/v2" + zero "github.com/wdvxdr1123/ZeroBot" "github.com/wdvxdr1123/ZeroBot/message" + "github.com/wdvxdr1123/ZeroBot/utils/helper" ) var ( re = regexp.MustCompile(`^[一-龥]+$`) stopwords []string + seg gse.Segmenter ) -var seg gse.Segmenter func init() { engine := control.AutoRegister(&ctrl.Options[*zero.Ctx]{ @@ -42,7 +45,10 @@ func init() { }) cachePath := engine.DataFolder() + "cache/" // 读取gse内置中文词典 - _ = seg.LoadDictEmbed("zh_s") + err := seg.LoadDictEmbed() + if err != nil { + panic(err) + } _ = os.RemoveAll(cachePath) _ = os.MkdirAll(cachePath, 0755) engine.OnRegex(`^热词\s?(\d*)\s?(\d*)$`, zero.OnlyGroup, fcext.DoOnceOnSuccess(func(ctx *zero.Ctx) bool { @@ -124,7 +130,7 @@ func init() { if tex == "" { continue } - segments := seg.Segment([]byte(tex)) + segments := seg.Segment(helper.StringToBytes(tex)) words := gse.ToSlice(segments, true) for _, word := range words { word = strings.TrimSpace(word)