您好,欢迎访问三七文档
当前位置:首页 > 中学教育 > 高中教育 > ICTCLAS学习笔记
ICTCLAShuangjin@ict.ac.cnICTCLASSTLBUFFER//!@@2006-7-27ICTCLASCDictionary::LoadCDictionary::SaveLoadbResetnFrequencytruenFrequency02006-7-31CDictionarysavem_pModifyTable0m_IndexTableCC_NUMm_pModifyTableinCountm_IndexTable[i].nCountm_IndexTable[i].nCount2006-8-2SaveAddItemDelItemm_pModifyTableSaveAddItemhandlehandlehandle3755//?3755falsebAddm_IndexTablem_IndexTablem_pModifyTabletrue1true1DelItemAddItemhandle-1handle-1handlem_IndexTablem_pModifyTabalehandlehandle-1falsenCountbug//!DelModifiedsWordbufferstringstlclearIsExisthandleGetHandlehandlehandleFindInOriginalTablehandlem_IndexTableFindInModifyTableGetWordTypePreProcessingsWordconststring&MergePOShandle-1deletehandlehandletrueGetMaxMatchhandleIDhandle-1if(pCur!=NULL&&CC_Find(pCur-data.sWord,sWordGet)!=pCur-data.sWord),//!bug!GetPOSValue32563100GetPOSStringGetPOSValuetrueGetFrequencyhandle0OutputOutputCharsm_pModifyTable50MergenRatioconst&handlehandlenRatio/(nRatio+1)(nRation+1)/10AddItem(nRatio+1)Optimumm_IndexTable0xgqgClearDictionarym_IndexTablem_pModifyTablesavem_IndexTable[i].nCountpWordItemHead-1nDeleteAddItemDelItemtMergePOSnDeletenCountAddItemDelItembug//!nCountnCountnDelete-1handle-1m_IndexTablem_pModifyTablebuffervectorlistWORD_ITEMWORD_ITEMhandlefrequencylistvectorvectorWORD_ITEMvectorvectorvectorm_pModifyTablevectorpairintnDelete,listWORD_ITEMvectorvectorvectorWORD_ITEM*vectorpairint,listWORD_ITEM*-1handle1mapmaphandlemap1vectorvectorWORD_ITEM:vectorpairint,listWORD_ITEM2vectorvectorWORD_ITEM*vectorpairint,listWORD_ITEM*3vectormappairstringsword,intnhandle,intnfrequencyvectorpairintndelete,mappairstringsword,intnhandle,intnfrequencyUtility.hUtility.cppCT_SENTENCE_BEGINCT_SENTENCE_ENDCT_SINGLEASCIICT_DELIMITERCT_DELIMITERCT_SINGLE_DELIMITERCT_CHINESECT_LETTERCT_NUMCT_SINGLE_NUMCT_INDEXCT_OTHER//?POSTFIX_SINGLEPOSTFIX_MUTIPLETRANS_ENGLISHTRANS_RUSSIANTRANS_JAPANESE·TT_ENGLISHTT_RUSSIANTT_JAPANESESEPERATOR_C_SENTENCESEPERATOR_C_SUB_SENTENCESEPERATOR_E_SENTENCESEPERATOR_E_SUB_SENTENCESEPERATOR_LINK1.2.3./n/r//!4.(.)SENTENCE_BEGINSENTENCE_ENDWORD_SEGMENTERboolGB2312_Generate(char*sFileName);BG2312sFileNameGB2312161255161255boolCC_Generate(char*sFileName);sFileName176255161255char*CC_Find(constchar*string,constchar*strCharSet);stringNULLintcharType(unsignedchar*sChar);CT_SINGLE_DELIMITERCT_SINGLE_NUM-CT_SINGLE_DELIMITERunsignedintGetCCPrefix(unsignedchar*sSentence);175248boolIsAllChinese(unsignedchar*sString);IsAllSingleByteboolIsAllNonChinese(unsignedchar*sString);trueboolIsAllSingleByte(unsignedchar*sString);boolIsAllNum(unsignedchar*sString);//?boolIsAllIndex(unsignedchar*sString);CT_INDEXboolIsAllLetter(unsignedchar*sString);CT_LETTERboolIsAllDelimiter(unsignedchar*sString);CT_DELIMTERintBinarySearch(intnVal,int*nTable,intnTableLen);-1boolIsForeign(char*sWord);2boolIsAllForeign(char*sWord);boolIsAllChineseNum(char*sWord);intGetForeignCharCount(char*sWord);intGetCharCount(char*sCharSet,char*sWord);sWordsCharSetintGetForeignType(char*sWord);GetForeignCharCountboolPostfixSplit(char*sWord,char*sWordRet,char*sPostfix);buffertrueIsAllChinese175248charTypeIsAllChinesecharTypeCT_CHINESEinlinebuffer2.0unsignedintGetchar(char*sBuffer,char*sChar);012CdynamicArrayvaluelistSetRowFirstboolGetHeadGetTailGetElementvalueSetElementSetEmptylistlistlist2006-8-4CqueueQueue.hQueue.cppQueuestructtagQueueElem{unsignedintnParent;//!unsignedintnIndex;//numberofindexintheparentnodeELEMENT_TYPEeWeight;//theweightoflastpathstructtagQueueElem*next;};CqueuepoppushpopPushpushPoppopbFirstGetm_pLastAccessbModifypopm_pLastAccessm_pLastAccessCqueceweightpoplistNN-m_apCostm_nValueKind1m_nVertex1m_pParentm_pWeightN5*4m_pParent[5][1]Cqueuem_pWeight[5][1]doubleintCNShortPath::ShortPath()m_apCostm_pParentm_pWeight0valuem_nValueKind0INFINITE_VALUEm_pParentGetPathsprivatevoidCNShortPath::GetPaths(unsignedintnNode,unsignedintnIndex,int**nResult,boolbBest)idbBestnNodenNodenIndex0nResultOutput2getpathsbBestMAX_SEGMENT_NUM10CshortPathGetPathsShortPathCsegGraphm_sAtomm_nAtomLengthm_nAtomPOSbufferm_nAtomCountCdynamicArraym_segGraphbuffer2000200vectorvecotrstruct{}stringnPOSstringCT_CHINESECT_INDEXCT_DELIMITERCT_OTHERcharTypem_sAtomm_nAtomLengthm_nAtomPOSbugGenerateWordNetm_segGraphbOriginalFreqflasefalse0truevaluelog0CT_INDEXCT_NUMCT_SINGLE_NUM-27904value030464valuevalue0valuevalue11999HMM2006-8-7Csegment2006-9-5CsegmentCSegmentCSegmentHHMM5432BiSegment2’’3445678//!ContextStat.hContextStat.cppCContextStatCspantagContextstructtagContext{intnKey;//Thekeywordint**aContextArray;//Thecontextarrayint*aTagFreq;//ThetotalnumberatagappearsintnTotalFreq;//ThetotalnumberofallthetagsstructtagContext*next;//ThechainpointertonextContext};CcontextStatif(m_tagType!=TT_NORMAL)m_nTags[0][0]=100;//Begintagelsem_nTags[0][0]=0;//Begintagm_nTags[0][1]=-1;m_dFrequency[0][0]=0;m_nCurLength=1;m_nUnknownIndex=0;m_nStartPos=0;m_nWordPosition[1]=0;m_sWords[0][0]=0;m_tagType=TT_NORMAL;//Defaulttaggingtypem_nWordPosition[0]m_nWordPosition[1]//!if(m_nTags[i-1][1
本文标题:ICTCLAS学习笔记
链接地址:https://www.777doc.com/doc-5593886 .html