Bladeren bron

banwords: move WordsSearch to lib

lanvent 3 jaren geleden
bovenliggende
commit
583440b82b
2 gewijzigde bestanden met toevoegingen van 1 en 251 verwijderingen
  1. 0 250
      plugins/banwords/WordsSearch.py
  2. 1 1
      plugins/banwords/banwords.py

+ 0 - 250
plugins/banwords/WordsSearch.py

@@ -1,250 +0,0 @@
-#!/usr/bin/env python
-# -*- coding:utf-8 -*-
-# ToolGood.Words.WordsSearch.py
-# 2020, Lin Zhijun, https://github.com/toolgood/ToolGood.Words
-# Licensed under the Apache License 2.0
-# 更新日志
-# 2020.04.06 第一次提交
-# 2020.05.16 修改,支持大于0xffff的字符
-
-__all__ = ['WordsSearch']
-__author__ = 'Lin Zhijun'
-__date__ = '2020.05.16'
-
-class TrieNode():
-    def __init__(self):
-        self.Index = 0
-        self.Index = 0
-        self.Layer = 0
-        self.End = False
-        self.Char = ''
-        self.Results = []
-        self.m_values = {}
-        self.Failure = None
-        self.Parent = None
-
-    def Add(self,c):
-        if c in self.m_values :
-            return self.m_values[c]
-        node = TrieNode()
-        node.Parent = self
-        node.Char = c
-        self.m_values[c] = node
-        return node
-
-    def SetResults(self,index):
-        if (self.End == False):
-            self.End = True
-        self.Results.append(index)
-
-class TrieNode2():
-    def __init__(self):
-        self.End = False
-        self.Results = []
-        self.m_values = {}
-        self.minflag = 0xffff
-        self.maxflag = 0
-
-    def Add(self,c,node3):
-        if (self.minflag > c):
-            self.minflag = c
-        if (self.maxflag < c):
-             self.maxflag = c
-        self.m_values[c] = node3
-
-    def SetResults(self,index):
-        if (self.End == False) :
-            self.End = True
-        if (index in self.Results )==False : 
-            self.Results.append(index)
-
-    def HasKey(self,c):
-        return c in self.m_values
-        
- 
-    def TryGetValue(self,c):
-        if (self.minflag <= c and self.maxflag >= c):
-            if c in self.m_values:
-                return self.m_values[c]
-        return None
-
-
-class WordsSearch():
-    def __init__(self):
-        self._first = {}
-        self._keywords = []
-        self._indexs=[]
-    
-    def SetKeywords(self,keywords):
-        self._keywords = keywords
-        self._indexs=[]
-        for i in range(len(keywords)):
-            self._indexs.append(i)
-
-        root = TrieNode()
-        allNodeLayer={}
-
-        for i in range(len(self._keywords)): # for (i = 0; i < _keywords.length; i++) 
-            p = self._keywords[i]
-            nd = root
-            for j in range(len(p)): # for (j = 0; j < p.length; j++) 
-                nd = nd.Add(ord(p[j]))
-                if (nd.Layer == 0):
-                    nd.Layer = j + 1
-                    if nd.Layer in allNodeLayer:
-                        allNodeLayer[nd.Layer].append(nd)
-                    else:
-                        allNodeLayer[nd.Layer]=[]
-                        allNodeLayer[nd.Layer].append(nd)
-            nd.SetResults(i)
-
-
-        allNode = []
-        allNode.append(root)
-        for key in allNodeLayer.keys():
-            for nd in allNodeLayer[key]:
-                allNode.append(nd)
-        allNodeLayer=None
-
-        for i in range(len(allNode)): # for (i = 0; i < allNode.length; i++) 
-            if i==0 :
-                continue
-            nd=allNode[i]
-            nd.Index = i
-            r = nd.Parent.Failure
-            c = nd.Char
-            while (r != None and (c in r.m_values)==False):
-                r = r.Failure
-            if (r == None):
-                nd.Failure = root
-            else:
-                nd.Failure = r.m_values[c]
-                for key2 in nd.Failure.Results :
-                    nd.SetResults(key2)
-        root.Failure = root
-
-        allNode2 = []
-        for i in range(len(allNode)): # for (i = 0; i < allNode.length; i++) 
-            allNode2.append( TrieNode2())
-        
-        for i in range(len(allNode2)): # for (i = 0; i < allNode2.length; i++) 
-            oldNode = allNode[i]
-            newNode = allNode2[i]
-
-            for key in oldNode.m_values :
-                index = oldNode.m_values[key].Index
-                newNode.Add(key, allNode2[index])
-            
-            for index in range(len(oldNode.Results)): # for (index = 0; index < oldNode.Results.length; index++) 
-                item = oldNode.Results[index]
-                newNode.SetResults(item)
-            
-            oldNode=oldNode.Failure
-            while oldNode != root:
-                for key in oldNode.m_values :
-                    if (newNode.HasKey(key) == False):
-                        index = oldNode.m_values[key].Index
-                        newNode.Add(key, allNode2[index])
-                for index in range(len(oldNode.Results)): 
-                    item = oldNode.Results[index]
-                    newNode.SetResults(item)
-                oldNode=oldNode.Failure
-        allNode = None
-        root = None
-
-        # first = []
-        # for index in range(65535):# for (index = 0; index < 0xffff; index++) 
-        #     first.append(None)
-        
-        # for key in allNode2[0].m_values :
-        #     first[key] = allNode2[0].m_values[key]
-        
-        self._first = allNode2[0]
-    
-
-    def FindFirst(self,text):
-        ptr = None
-        for index in range(len(text)): # for (index = 0; index < text.length; index++) 
-            t =ord(text[index]) # text.charCodeAt(index)
-            tn = None
-            if (ptr == None):
-                tn = self._first.TryGetValue(t)
-            else:
-                tn = ptr.TryGetValue(t)
-                if (tn==None):
-                    tn = self._first.TryGetValue(t)
-                
-            
-            if (tn != None):
-                if (tn.End):
-                    item = tn.Results[0]
-                    keyword = self._keywords[item]
-                    return { "Keyword": keyword, "Success": True, "End": index, "Start": index + 1 - len(keyword), "Index": self._indexs[item] }
-            ptr = tn
-        return None
-
-    def FindAll(self,text):
-        ptr = None
-        list = []
-
-        for index in range(len(text)): # for (index = 0; index < text.length; index++) 
-            t =ord(text[index]) # text.charCodeAt(index)
-            tn = None
-            if (ptr == None):
-                tn = self._first.TryGetValue(t)
-            else:
-                tn = ptr.TryGetValue(t)
-                if (tn==None):
-                    tn = self._first.TryGetValue(t)
-                
-            
-            if (tn != None):
-                if (tn.End):
-                    for j in range(len(tn.Results)): # for (j = 0; j < tn.Results.length; j++) 
-                        item = tn.Results[j]
-                        keyword = self._keywords[item]
-                        list.append({ "Keyword": keyword, "Success": True, "End": index, "Start": index + 1 - len(keyword), "Index": self._indexs[item] })
-            ptr = tn
-        return list
-
-
-    def ContainsAny(self,text):
-        ptr = None
-        for index in range(len(text)): # for (index = 0; index < text.length; index++) 
-            t =ord(text[index]) # text.charCodeAt(index)
-            tn = None
-            if (ptr == None):
-                tn = self._first.TryGetValue(t)
-            else:
-                tn = ptr.TryGetValue(t)
-                if (tn==None):
-                    tn = self._first.TryGetValue(t)
-            
-            if (tn != None):
-                if (tn.End):
-                    return True
-            ptr = tn
-        return False
-    
-    def Replace(self,text, replaceChar = '*'):
-        result = list(text) 
-
-        ptr = None
-        for i in range(len(text)): # for (i = 0; i < text.length; i++) 
-            t =ord(text[i]) # text.charCodeAt(index)
-            tn = None
-            if (ptr == None):
-                tn = self._first.TryGetValue(t)
-            else:
-                tn = ptr.TryGetValue(t)
-                if (tn==None):
-                    tn = self._first.TryGetValue(t)
-            
-            if (tn != None):
-                if (tn.End):
-                    maxLength = len( self._keywords[tn.Results[0]])
-                    start = i + 1 - maxLength
-                    for j in range(start,i+1): # for (j = start; j <= i; j++) 
-                        result[j] = replaceChar
-            ptr = tn
-        return ''.join(result) 

+ 1 - 1
plugins/banwords/banwords.py

@@ -7,7 +7,7 @@ from bridge.reply import Reply, ReplyType
 import plugins
 from plugins import *
 from common.log import logger
-from .WordsSearch import WordsSearch
+from .lib.WordsSearch import WordsSearch
 
 
 @plugins.register(name="Banwords", desire_priority=100, hidden=True, desc="判断消息中是否有敏感词、决定是否回复。", version="1.0", author="lanvent")