This commit is contained in:
Shibo Lyu 2022-05-20 12:45:26 +08:00
parent 2640bebf90
commit d3dcfc84de
13 changed files with 352 additions and 41 deletions

14
Package.resolved Normal file
View file

@ -0,0 +1,14 @@
{
"pins" : [
{
"identity" : "syllable-counter-swift",
"kind" : "remoteSourceControl",
"location" : "https://github.com/wfreitag/syllable-counter-swift",
"state" : {
"branch" : "master",
"revision" : "029c8568b4d060174284fdedd7473863768a903b"
}
}
],
"version" : 2
}

View file

@ -1,28 +1,32 @@
// swift-tools-version: 5.6
// The swift-tools-version declares the minimum version of Swift required to build this package.
import PackageDescription
let package = Package(
name: "ReadabilityFramework",
name: "Readability",
platforms: [
.macOS(.v10_14),
.iOS(.v12),
.tvOS(.v12),
.watchOS(.v5),
.macCatalyst(.v14)
],
products: [
// Products define the executables and libraries a package produces, and make them visible to other packages.
.library(
name: "ReadabilityFramework",
targets: ["ReadabilityFramework"]),
name: "Readability",
targets: ["Readability"]),
],
dependencies: [
// Dependencies declare other packages that this package depends on.
// .package(url: /* package url */, from: "1.0.0"),
.package(url: "https://github.com/wfreitag/syllable-counter-swift", branch: "master")
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
// Targets can depend on other targets in this package, and on products in packages this package depends on.
.target(
name: "ReadabilityFramework",
dependencies: []),
name: "Readability",
dependencies: [
.product(name: "SyllableCounter", package: "syllable-counter-swift")
]),
.testTarget(
name: "ReadabilityFrameworkTests",
dependencies: ["ReadabilityFramework"]),
name: "ReadabilityTests",
dependencies: ["Readability"]),
]
)

View file

@ -1,3 +1,3 @@
# ReadabilityFramework
# Readability.framework
A description of this package.
A package that provides the calculation of common readability metrics.

View file

@ -0,0 +1,74 @@
//
// CommonMetrics.swift
// Readability
//
// Created by Shibo Lyu on 2022/5/19.
//
import Foundation
import SyllableCounter
enum RACommonMetric {
case sentenceCount
case wordCount
case syllableCount
case avgWordsPerSentence
case avgSyllablesPerWord
}
struct RACommonMetricsCalculator {
typealias Results = [RACommonMetric: Double]
private var metrics: Set<RACommonMetric>
init(metrics: Set<RACommonMetric>) {
self.metrics = metrics
}
private func countSyllables(word: String) -> Int {
SyllableCounter.shared.count(word: word)
}
func calculate(on text: String) -> Results {
let shouldDoSentences = metrics.contains(.sentenceCount) || metrics.contains(.avgWordsPerSentence)
let shouldDoWords = metrics.contains(.wordCount)
|| metrics.contains(.avgWordsPerSentence)
|| metrics.contains(.avgSyllablesPerWord)
let shouldCountSyllables = metrics.contains(.avgSyllablesPerWord) || metrics.contains(.syllableCount)
var sentenceCount = 0
var wordCount = 0
var syllableCount = 0
if shouldDoSentences {
sentenceCount = RATokenizer(text, unit: .sentence).enumerateTokens { _ in }
}
if shouldDoWords {
let tokenizer = RATokenizer(text, unit: .word)
if shouldCountSyllables {
wordCount = tokenizer.enumerateTokens { word in
syllableCount += countSyllables(word: word)
}
} else {
wordCount = tokenizer.enumerateTokens { _ in }
}
}
return metrics.reduce([:]) { dict, metric in
var value = 0.0
switch metric {
case .syllableCount: value = Double(syllableCount)
case .sentenceCount: value = Double(sentenceCount)
case .wordCount: value = Double(wordCount)
case .avgWordsPerSentence: value = Double(wordCount) / Double(sentenceCount)
case .avgSyllablesPerWord: value = Double(syllableCount) / Double(wordCount)
}
var d = dict
d[metric] = value
return d
}
}
}

View file

@ -0,0 +1,24 @@
//
// ReadabilityScorer.swift
// Readability
//
// Created by Shibo Lyu on 2022/5/19.
//
import Foundation
import SwiftUI
struct RAScorerMeta {
let name: String
let creator: String
/** Should be in APA format. */
let citation: String
}
protocol RAScorer {
static var meta: RAScorerMeta { get }
static var requiresCommonMetrics: Set<RACommonMetric>? { get }
init()
func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double
}

View file

@ -0,0 +1,28 @@
//
// File.swift
// Readability
//
// Created by Shibo Lyu on 2022/5/20.
//
import Foundation
struct RAFleschKincaidGradeScorer: RAScorer {
static let requiresCommonMetrics: Set<RACommonMetric>? = [
.avgWordsPerSentence,
.avgSyllablesPerWord
]
static let meta = RAScorerMeta(
name: "Flesch-Kincaid Grade",
creator: "John P. Kincaid",
citation: "Kincaid, J. P., Fishburne Jr, R. P., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count and flesch reading ease formula) for navy enlisted personnel. Naval Technical Training Command Millington TN Research Branch."
)
func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double {
let asl = metrics![.avgWordsPerSentence]!
let asw = metrics![.avgSyllablesPerWord]!
return (0.39 * asl) + (11.8 * asw) - 15.59
}
}

View file

@ -0,0 +1,28 @@
//
// File.swift
// Readability
//
// Created by Shibo Lyu on 2022/5/19.
//
import Foundation
struct RAFleschReadingEaseScorer: RAScorer {
static let requiresCommonMetrics: Set<RACommonMetric>? = [
.avgWordsPerSentence,
.avgSyllablesPerWord
]
static let meta = RAScorerMeta(
name: "Flesch Reading Ease",
creator: "Rudolf Flesch",
citation: "Flesch, R. (1948). A new readability yardstick. Journal of applied psychology, 32(3), 221."
)
func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double {
let asl = metrics![.avgWordsPerSentence]!
let asw = metrics![.avgSyllablesPerWord]!
return 206.835 - (1.015 * asl) - (84.6 * asw)
}
}

View file

@ -0,0 +1,57 @@
//
// ScoringTask.swift
// Readability
//
// Created by Shibo Lyu on 2022/5/19.
//
import Foundation
class RAScoringTask {
typealias Results = [Scorer: Double]
enum Scorer: Hashable, CaseIterable, Comparable {
case fleschReadingEase
case fleschKincaidGrade
}
static let availableScorers: [Scorer: RAScorer.Type] = [
.fleschReadingEase: RAFleschReadingEaseScorer.self,
.fleschKincaidGrade: RAFleschKincaidGradeScorer.self
]
var scorersToRun: Set<Scorer> = Set(Scorer.allCases)
var scorers: [Scorer: RAScorer.Type] {
scorersToRun.reduce([:]) { partialResult, scorer in
var d = partialResult
d[scorer] = Self.availableScorers[scorer]!
return d
}
}
var commonMetricsToGet: Set<RACommonMetric> {
scorers
.map { $0.1.requiresCommonMetrics }
.reduce([]) { partialResult, scorerMetrics in
guard let metrics = scorerMetrics else { return partialResult }
return partialResult.union(metrics)
}
}
func run(on text: String) -> Results {
var commonMetrics: RACommonMetricsCalculator.Results? = nil
if !commonMetricsToGet.isEmpty {
commonMetrics = RACommonMetricsCalculator(metrics: commonMetricsToGet)
.calculate(on: text)
}
return scorers.reduce([:]) { partialResult, scorerPair in
var d = partialResult
let (scorer, Scorer) = scorerPair
d[scorer] = Scorer.init().score(text, metrics: commonMetrics)
return d
}
}
}

View file

@ -0,0 +1,39 @@
//
// Tokenization.swift
// Readability
//
// Created by Shibo Lyu on 2022/5/19.
//
import Foundation
import NaturalLanguage
struct RATokenizer {
private var unit: NLTokenUnit
private var text: String
private var tokenizer: NLTokenizer
init (_ text: String, unit: NLTokenUnit, language: NLLanguage? = nil) {
self.text = text
self.unit = unit
tokenizer = NLTokenizer(unit: unit)
if let language = language {
tokenizer.setLanguage(language)
}
tokenizer.string = text
}
/** Returns token count. */
func enumerateTokens(using callBack: (String) -> Void) -> Int {
var tokenCount = 0
tokenizer.enumerateTokens(in: text.startIndex..<text.endIndex) { range, _ in
tokenCount += 1
callBack(String(text[range]))
return true
}
return tokenCount
}
}

View file

@ -1,6 +0,0 @@
public struct ReadabilityFramework {
public private(set) var text = "Hello, World!"
public init() {
}
}

View file

@ -1,11 +0,0 @@
import XCTest
@testable import ReadabilityFramework
final class ReadabilityFrameworkTests: XCTestCase {
func testExample() throws {
// This is an example of a functional test case.
// Use XCTAssert and related functions to verify your tests produce the correct
// results.
XCTAssertEqual(ReadabilityFramework().text, "Hello, World!")
}
}

View file

@ -0,0 +1,32 @@
import XCTest
@testable import Readability
final class CommonMetrics: XCTestCase {
func testOnlyWordCount() throws {
let calc = RACommonMetricsCalculator(metrics: [.wordCount])
let results = calc.calculate(on: "Hello, World!")
XCTAssertEqual(results, [.wordCount: 2.0])
}
func testAllMetrics() throws {
let calc = RACommonMetricsCalculator(metrics: [
.syllableCount,
.wordCount,
.sentenceCount,
.avgSyllablesPerWord,
.avgWordsPerSentence,
])
let results = calc.calculate(on: "Hello, World!")
XCTAssertEqual(results, [
.syllableCount: 3.0,
.wordCount: 2.0,
.sentenceCount: 1.0,
.avgSyllablesPerWord: 1.5,
.avgWordsPerSentence: 2.0
])
}
}

View file

@ -0,0 +1,28 @@
import XCTest
@testable import Readability
final class Tokenization: XCTestCase {
func testHelloWorldSentence() throws {
let tokenizer = RATokenizer("Hello, world!", unit: .sentence)
var results: [String] = []
let count = tokenizer.enumerateTokens { sent in
results.append(sent)
}
XCTAssertEqual(count, 1)
XCTAssertEqual(results, ["Hello, world!"])
}
func testHelloWorldWord() throws {
let tokenizer = RATokenizer("Hello, world!", unit: .word)
var results: [String] = []
let count = tokenizer.enumerateTokens { sent in
results.append(sent)
}
XCTAssertEqual(count, 2)
XCTAssertEqual(results, ["Hello", "world"])
}
}