// Translation of [Peter Norvig's spell checker](http://norvig.com/spell-correct.html) into Swift.
// Sample input corpus [here](http://norvig.com/big.txt)
import Foundation
extension SequenceType {
func mapSome<T>(@noescape transform: (Self.Generator.Element) -> T?) -> [T] {
return Array(self.map(transform).filter { $0 != nil }.map { $0! })
}
}
struct SpellingCorrector {
var knownWords: [String:Int] = [:]
init?(contentsOfFile file: String) {
do {
let text = try String(contentsOfFile: file, encoding: NSUTF8StringEncoding)
print("Loaded file \(file)")
let words = text.lowercaseString.characters.split { !("a"..."z").contains($0) }.map(String.init)
print("Split string into \(words.count) words")
for word in words { self.train(word) }
print("Trained \(knownWords.count) unique words")
} catch _ as NSError {
return nil
}
}
mutating func train(word: String) {
knownWords[word] = knownWords[word]?.successor() ?? 1
}
// Given a word, produce a set of possible alternatives with
// letters transposed, deleted, replaced or rogue characters inserted
func edits1(word: String) -> Set<String> {
if word.isEmpty { return [] }
let splits = word.characters.indices.map {
return (word[word.startIndex..<$0], word[$0..<word.endIndex])
}
let deletes = splits.map { (left, right) -> String in
return left + right.substringFromIndex(right.startIndex.advancedBy(1))
}
let transposes = (splits + [(word, "")]).mapSome { (left, right) -> String? in
if let fst = right.characters.first {
let drop1 = right.substringFromIndex(right.startIndex.advancedBy(1))
if let snd = String(drop1).characters.first {
let drop2 = drop1.substringFromIndex(drop1.startIndex.advancedBy(1))
return left + String(snd) + String(fst) + drop2
}
}
return nil
}
let alphabet = "abcdefghijklmnopqrstuvwxyz"
let replaces = splits.flatMap { left, right in
alphabet.characters.map {
left + String($0) + right.substringFromIndex(right.startIndex.advancedBy(1))
}
}
let inserts = (splits + [(word, "")]).flatMap { left, right -> [String] in
alphabet.characters.map {
return left + String($0) + right
}
}
return Set(deletes + transposes + replaces + inserts)
}
func knownEdits2(word: String) -> Set<String>? {
var known_edits: Set<String> = []
for edit in edits1(word) {
if let k = known(edits1(edit)) {
known_edits.unionInPlace(k)
}
}
return known_edits.isEmpty ? nil : known_edits
}
func known<S: SequenceType where S.Generator.Element == String>(words: S) -> Set<String>? {
let s = Set(words.filter { self.knownWords.indexForKey($0) != nil })
return s.isEmpty ? nil : s
}
func correct(word: String) -> String {
let candidates = known([word]) ?? known(edits1(word)) ?? knownEdits2(word)
return (candidates ?? []).reduce(word, combine: {
(knownWords[$0] ?? 1) < (knownWords[$1] ?? 1) ? $1 : $0
})
}
}
let tests1 = ["access": "acess", "accessing": "accesing", "accommodation":
// .../...
"whether": "wether", "wrote": "rote wote"]
let tests2 = ["forbidden": "forbiden", "decisions": "deciscions descisions",
// .../...
"together": "togehter", "profits": "proffits"]
func spelltest(tests: [String:String], bias: Int? = nil, verbose: Bool = false) -> [String:Any]{
var n = 0, bad = 0, unknown = 0, start = NSDate()
var checker = SpellingCorrector(contentsOfFile: "big.txt")!
if let bias = bias {
for target in tests.keys {
checker.knownWords[target]! += bias
}
}
for (target, wrongs) in tests {
let strings = wrongs.characters.split{$0 == " "}.map(String.init)
for wrong in strings {
n += 1
let w = checker.correct(wrong)
if w != target {
bad += 1
unknown += (checker.knownWords[target] != nil ? 1 : 0)
if verbose {
print ("\(wrong) => \(w) (\(checker.knownWords[w])); expected \(target) (\(checker.knownWords[target]))")
}
}
}
}
let pct = Int(100.0 - 100.0*Double(bad)/Double(n))
let end = NSDate()
return ["bad": bad, "n": n, "bias": bias, "pct": pct, "unknown": unknown, "secs": end.timeIntervalSinceDate(start)]
}
print(spelltest(tests1, bias: nil, verbose: true))
print(spelltest(tests2))
Ich glaube, ich sehe, aber wenn ich versuche, das Beispiel für die Verwendung in [die Änderungen Funktion] Stecker (https://gist.github.com/airspeedswift/6d8c9d95f0d812f58be3 # file-spelling-swift-L36) als 'return (word [RangeStart (Ende: $ 0)], Word [RangeStart (Start: $ 0)]]' es gibt mir immer noch einen Compilerfehler. –
@DougSmith Die Zeile, die Sie verknüpfen, scheint die 'String'-Erweiterung anstelle der' Array'-Erweiterung zu verwenden, die Sie oben gefragt haben. Außerdem habe ich nie 'Indizes' gesehen, die auf einer' String'-Instanz verwendet wurden. (Ist 'indexes' eine veraltete globale Funktion für 'CollectionType'-Objekte, die jetzt nicht verfügbar ist?). – dfri
Wie würde ich das verwenden? Auch "Indizes" sind einfach die "Zeichen". –