NSLinguisticTagger是iOS自然語言處理工具珠洗,支持中文簡體溜歪,繁體,日文许蓖,英文等多種語言蝴猪。使用期做了一個分詞的功能,結(jié)果很準(zhǔn)確膊爪。連南京市長江大橋這樣易混淆的句子都能夠分的很準(zhǔn)確自阱。
NSString *string = self.text.text;
NSLinguisticTaggerOptions options = NSLinguisticTaggerOmitWhitespace | NSLinguisticTaggerJoinNames |NSLinguisticTaggerOmitPunctuation;
NSArray *arr = [NSLinguisticTagger availableTagSchemesForUnit:NSLinguisticTaggerUnitWord|NSLinguisticTaggerUnitDocument language:@"zh-Hans"];
NSLinguisticTagger * tagger = [[NSLinguisticTagger alloc]initWithTagSchemes:arr options:options];
tagger.string = string;
NSMutableArray *array = [[NSMutableArray alloc]init];
NSString *printStr = @"";
[tagger enumerateTagsInRange:NSMakeRange(0, string.length) scheme:NSLinguisticTagSchemeScript options:options usingBlock:^(NSString * _Nonnull tag, NSRange tokenRange, NSRange sentenceRange, BOOL * _Nonnull stop) {
NSString *token = [string substringWithRange:tokenRange];
[array addObject:token];
//[str stringByAppendingFormat:token];
//NSLog(@"%@",array);
}];
for(int i = 0; i < [array count]; i++){
printStr = [printStr stringByAppendingFormat:@"%@\n", [array objectAtIndex:i]];
}
效果如圖