From d3dcfc84de7d64c610a25476500f296328e7cf68 Mon Sep 17 00:00:00 2001 From: Shibo Lyu Date: Fri, 20 May 2022 12:45:26 +0800 Subject: [PATCH] Basics. --- Package.resolved | 14 ++++ Package.swift | 48 ++++++------ README.md | 4 +- Sources/Readability/CommonMetrics.swift | 74 +++++++++++++++++++ Sources/Readability/Scorer.swift | 24 ++++++ .../Scorers/FleschKincaidGrade.swift | 28 +++++++ .../Scorers/FleschReadingEase.swift | 28 +++++++ Sources/Readability/ScoringTask.swift | 57 ++++++++++++++ Sources/Readability/Tokenization.swift | 39 ++++++++++ .../ReadabilityFramework.swift | 6 -- .../ReadabilityFrameworkTests.swift | 11 --- Tests/ReadabilityTests/CommonMetrics.swift | 32 ++++++++ Tests/ReadabilityTests/Tokenization.swift | 28 +++++++ 13 files changed, 352 insertions(+), 41 deletions(-) create mode 100644 Package.resolved create mode 100644 Sources/Readability/CommonMetrics.swift create mode 100644 Sources/Readability/Scorer.swift create mode 100644 Sources/Readability/Scorers/FleschKincaidGrade.swift create mode 100644 Sources/Readability/Scorers/FleschReadingEase.swift create mode 100644 Sources/Readability/ScoringTask.swift create mode 100644 Sources/Readability/Tokenization.swift delete mode 100644 Sources/ReadabilityFramework/ReadabilityFramework.swift delete mode 100644 Tests/ReadabilityFrameworkTests/ReadabilityFrameworkTests.swift create mode 100644 Tests/ReadabilityTests/CommonMetrics.swift create mode 100644 Tests/ReadabilityTests/Tokenization.swift diff --git a/Package.resolved b/Package.resolved new file mode 100644 index 0000000..60cc10b --- /dev/null +++ b/Package.resolved @@ -0,0 +1,14 @@ +{ + "pins" : [ + { + "identity" : "syllable-counter-swift", + "kind" : "remoteSourceControl", + "location" : "https://github.com/wfreitag/syllable-counter-swift", + "state" : { + "branch" : "master", + "revision" : "029c8568b4d060174284fdedd7473863768a903b" + } + } + ], + "version" : 2 +} diff --git a/Package.swift b/Package.swift index 42e95d2..3542f3e 100644 --- a/Package.swift +++ b/Package.swift @@ -1,28 +1,32 @@ // swift-tools-version: 5.6 -// The swift-tools-version declares the minimum version of Swift required to build this package. import PackageDescription let package = Package( - name: "ReadabilityFramework", - products: [ - // Products define the executables and libraries a package produces, and make them visible to other packages. - .library( - name: "ReadabilityFramework", - targets: ["ReadabilityFramework"]), - ], - dependencies: [ - // Dependencies declare other packages that this package depends on. - // .package(url: /* package url */, from: "1.0.0"), - ], - targets: [ - // Targets are the basic building blocks of a package. A target can define a module or a test suite. - // Targets can depend on other targets in this package, and on products in packages this package depends on. - .target( - name: "ReadabilityFramework", - dependencies: []), - .testTarget( - name: "ReadabilityFrameworkTests", - dependencies: ["ReadabilityFramework"]), - ] + name: "Readability", + platforms: [ + .macOS(.v10_14), + .iOS(.v12), + .tvOS(.v12), + .watchOS(.v5), + .macCatalyst(.v14) + ], + products: [ + .library( + name: "Readability", + targets: ["Readability"]), + ], + dependencies: [ + .package(url: "https://github.com/wfreitag/syllable-counter-swift", branch: "master") + ], + targets: [ + .target( + name: "Readability", + dependencies: [ + .product(name: "SyllableCounter", package: "syllable-counter-swift") + ]), + .testTarget( + name: "ReadabilityTests", + dependencies: ["Readability"]), + ] ) diff --git a/README.md b/README.md index bec2b19..ad79913 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ -# ReadabilityFramework +# Readability.framework -A description of this package. +A package that provides the calculation of common readability metrics. diff --git a/Sources/Readability/CommonMetrics.swift b/Sources/Readability/CommonMetrics.swift new file mode 100644 index 0000000..a7d2363 --- /dev/null +++ b/Sources/Readability/CommonMetrics.swift @@ -0,0 +1,74 @@ +// +// CommonMetrics.swift +// Readability +// +// Created by Shibo Lyu on 2022/5/19. +// + +import Foundation +import SyllableCounter + +enum RACommonMetric { + case sentenceCount + case wordCount + case syllableCount + case avgWordsPerSentence + case avgSyllablesPerWord +} + +struct RACommonMetricsCalculator { + typealias Results = [RACommonMetric: Double] + + private var metrics: Set + + init(metrics: Set) { + self.metrics = metrics + } + + private func countSyllables(word: String) -> Int { + SyllableCounter.shared.count(word: word) + } + + func calculate(on text: String) -> Results { + let shouldDoSentences = metrics.contains(.sentenceCount) || metrics.contains(.avgWordsPerSentence) + let shouldDoWords = metrics.contains(.wordCount) + || metrics.contains(.avgWordsPerSentence) + || metrics.contains(.avgSyllablesPerWord) + let shouldCountSyllables = metrics.contains(.avgSyllablesPerWord) || metrics.contains(.syllableCount) + + var sentenceCount = 0 + var wordCount = 0 + var syllableCount = 0 + + if shouldDoSentences { + sentenceCount = RATokenizer(text, unit: .sentence).enumerateTokens { _ in } + } + + if shouldDoWords { + let tokenizer = RATokenizer(text, unit: .word) + if shouldCountSyllables { + wordCount = tokenizer.enumerateTokens { word in + syllableCount += countSyllables(word: word) + } + } else { + wordCount = tokenizer.enumerateTokens { _ in } + } + } + + return metrics.reduce([:]) { dict, metric in + var value = 0.0 + + switch metric { + case .syllableCount: value = Double(syllableCount) + case .sentenceCount: value = Double(sentenceCount) + case .wordCount: value = Double(wordCount) + case .avgWordsPerSentence: value = Double(wordCount) / Double(sentenceCount) + case .avgSyllablesPerWord: value = Double(syllableCount) / Double(wordCount) + } + + var d = dict + d[metric] = value + return d + } + } +} diff --git a/Sources/Readability/Scorer.swift b/Sources/Readability/Scorer.swift new file mode 100644 index 0000000..4f5d590 --- /dev/null +++ b/Sources/Readability/Scorer.swift @@ -0,0 +1,24 @@ +// +// ReadabilityScorer.swift +// Readability +// +// Created by Shibo Lyu on 2022/5/19. +// + +import Foundation +import SwiftUI + +struct RAScorerMeta { + let name: String + let creator: String + /** Should be in APA format. */ + let citation: String +} + +protocol RAScorer { + static var meta: RAScorerMeta { get } + static var requiresCommonMetrics: Set? { get } + + init() + func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double +} diff --git a/Sources/Readability/Scorers/FleschKincaidGrade.swift b/Sources/Readability/Scorers/FleschKincaidGrade.swift new file mode 100644 index 0000000..ec594d8 --- /dev/null +++ b/Sources/Readability/Scorers/FleschKincaidGrade.swift @@ -0,0 +1,28 @@ +// +// File.swift +// Readability +// +// Created by Shibo Lyu on 2022/5/20. +// + +import Foundation + +struct RAFleschKincaidGradeScorer: RAScorer { + static let requiresCommonMetrics: Set? = [ + .avgWordsPerSentence, + .avgSyllablesPerWord + ] + + static let meta = RAScorerMeta( + name: "Flesch-Kincaid Grade", + creator: "John P. Kincaid", + citation: "Kincaid, J. P., Fishburne Jr, R. P., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count and flesch reading ease formula) for navy enlisted personnel. Naval Technical Training Command Millington TN Research Branch." + ) + + func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double { + let asl = metrics![.avgWordsPerSentence]! + let asw = metrics![.avgSyllablesPerWord]! + + return (0.39 * asl) + (11.8 * asw) - 15.59 + } +} diff --git a/Sources/Readability/Scorers/FleschReadingEase.swift b/Sources/Readability/Scorers/FleschReadingEase.swift new file mode 100644 index 0000000..e2043ba --- /dev/null +++ b/Sources/Readability/Scorers/FleschReadingEase.swift @@ -0,0 +1,28 @@ +// +// File.swift +// Readability +// +// Created by Shibo Lyu on 2022/5/19. +// + +import Foundation + +struct RAFleschReadingEaseScorer: RAScorer { + static let requiresCommonMetrics: Set? = [ + .avgWordsPerSentence, + .avgSyllablesPerWord + ] + + static let meta = RAScorerMeta( + name: "Flesch Reading Ease", + creator: "Rudolf Flesch", + citation: "Flesch, R. (1948). A new readability yardstick. Journal of applied psychology, 32(3), 221." + ) + + func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double { + let asl = metrics![.avgWordsPerSentence]! + let asw = metrics![.avgSyllablesPerWord]! + + return 206.835 - (1.015 * asl) - (84.6 * asw) + } +} diff --git a/Sources/Readability/ScoringTask.swift b/Sources/Readability/ScoringTask.swift new file mode 100644 index 0000000..cba0499 --- /dev/null +++ b/Sources/Readability/ScoringTask.swift @@ -0,0 +1,57 @@ +// +// ScoringTask.swift +// Readability +// +// Created by Shibo Lyu on 2022/5/19. +// + +import Foundation + +class RAScoringTask { + typealias Results = [Scorer: Double] + + enum Scorer: Hashable, CaseIterable, Comparable { + case fleschReadingEase + case fleschKincaidGrade + } + + static let availableScorers: [Scorer: RAScorer.Type] = [ + .fleschReadingEase: RAFleschReadingEaseScorer.self, + .fleschKincaidGrade: RAFleschKincaidGradeScorer.self + ] + + var scorersToRun: Set = Set(Scorer.allCases) + + var scorers: [Scorer: RAScorer.Type] { + scorersToRun.reduce([:]) { partialResult, scorer in + var d = partialResult + d[scorer] = Self.availableScorers[scorer]! + return d + } + } + + var commonMetricsToGet: Set { + scorers + .map { $0.1.requiresCommonMetrics } + .reduce([]) { partialResult, scorerMetrics in + guard let metrics = scorerMetrics else { return partialResult } + return partialResult.union(metrics) + } + } + + func run(on text: String) -> Results { + var commonMetrics: RACommonMetricsCalculator.Results? = nil + + if !commonMetricsToGet.isEmpty { + commonMetrics = RACommonMetricsCalculator(metrics: commonMetricsToGet) + .calculate(on: text) + } + + return scorers.reduce([:]) { partialResult, scorerPair in + var d = partialResult + let (scorer, Scorer) = scorerPair + d[scorer] = Scorer.init().score(text, metrics: commonMetrics) + return d + } + } +} diff --git a/Sources/Readability/Tokenization.swift b/Sources/Readability/Tokenization.swift new file mode 100644 index 0000000..0b21fcd --- /dev/null +++ b/Sources/Readability/Tokenization.swift @@ -0,0 +1,39 @@ +// +// Tokenization.swift +// Readability +// +// Created by Shibo Lyu on 2022/5/19. +// + +import Foundation +import NaturalLanguage + +struct RATokenizer { + private var unit: NLTokenUnit + private var text: String + + private var tokenizer: NLTokenizer + + init (_ text: String, unit: NLTokenUnit, language: NLLanguage? = nil) { + self.text = text + self.unit = unit + tokenizer = NLTokenizer(unit: unit) + if let language = language { + tokenizer.setLanguage(language) + } + tokenizer.string = text + } + + /** Returns token count. */ + func enumerateTokens(using callBack: (String) -> Void) -> Int { + var tokenCount = 0 + tokenizer.enumerateTokens(in: text.startIndex..