Basics.

2025-12-29 10:02:35 +00:00 · 2022-05-20 12:45:26 +08:00 · 2022-05-20 12:45:26 +08:00 · d3dcfc84de
commit d3dcfc84de
parent 2640bebf90
13 changed files with 352 additions and 41 deletions
--- a/Package.resolved
+++ b/Package.resolved
@ -0,0 +1,14 @@
+{
+  "pins" : [
+    {
+      "identity" : "syllable-counter-swift",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/wfreitag/syllable-counter-swift",
+      "state" : {
+        "branch" : "master",
+        "revision" : "029c8568b4d060174284fdedd7473863768a903b"
+      }
+    }
+  ],
+  "version" : 2
+}
--- a/Package.swift
+++ b/Package.swift
@ -1,28 +1,32 @@
 // swift-tools-version: 5.6
-// The swift-tools-version declares the minimum version of Swift required to build this package.

 import PackageDescription

 let package = Package(
-    name: "ReadabilityFramework",
+  name: "Readability",
+  platforms: [
+    .macOS(.v10_14),
+    .iOS(.v12),
+    .tvOS(.v12),
+    .watchOS(.v5),
+    .macCatalyst(.v14)
+  ],
  products: [
-        // Products define the executables and libraries a package produces, and make them visible to other packages.
    .library(
-            name: "ReadabilityFramework",
-            targets: ["ReadabilityFramework"]),
+      name: "Readability",
+      targets: ["Readability"]),
  ],
  dependencies: [
-        // Dependencies declare other packages that this package depends on.
-        // .package(url: /* package url */, from: "1.0.0"),
+    .package(url: "https://github.com/wfreitag/syllable-counter-swift", branch: "master")
  ],
  targets: [
-        // Targets are the basic building blocks of a package. A target can define a module or a test suite.
-        // Targets can depend on other targets in this package, and on products in packages this package depends on.
    .target(
-            name: "ReadabilityFramework",
-            dependencies: []),
+      name: "Readability",
+      dependencies: [
+        .product(name: "SyllableCounter", package: "syllable-counter-swift")
+      ]),
    .testTarget(
-            name: "ReadabilityFrameworkTests",
-            dependencies: ["ReadabilityFramework"]),
+      name: "ReadabilityTests",
+      dependencies: ["Readability"]),
  ]
 )
--- a/README.md
+++ b/README.md
@ -1,3 +1,3 @@
-# ReadabilityFramework
+# Readability.framework

-A description of this package.
+A package that provides the calculation of common readability metrics.
--- a/Sources/Readability/CommonMetrics.swift
+++ b/Sources/Readability/CommonMetrics.swift
@ -0,0 +1,74 @@
+//
+//  CommonMetrics.swift
+//  Readability
+//
+//  Created by Shibo Lyu on 2022/5/19.
+//
+
+import Foundation
+import SyllableCounter
+
+enum RACommonMetric {
+  case sentenceCount
+  case wordCount
+  case syllableCount
+  case avgWordsPerSentence
+  case avgSyllablesPerWord
+}
+
+struct RACommonMetricsCalculator {
+  typealias Results = [RACommonMetric: Double]
+
+  private var metrics: Set<RACommonMetric>
+
+  init(metrics: Set<RACommonMetric>) {
+    self.metrics = metrics
+  }
+
+  private func countSyllables(word: String) -> Int {
+    SyllableCounter.shared.count(word: word)
+  }
+
+  func calculate(on text: String) -> Results {
+    let shouldDoSentences = metrics.contains(.sentenceCount) || metrics.contains(.avgWordsPerSentence)
+    let shouldDoWords = metrics.contains(.wordCount)
+      || metrics.contains(.avgWordsPerSentence)
+      || metrics.contains(.avgSyllablesPerWord)
+    let shouldCountSyllables = metrics.contains(.avgSyllablesPerWord) || metrics.contains(.syllableCount)
+
+    var sentenceCount = 0
+    var wordCount = 0
+    var syllableCount = 0
+
+    if shouldDoSentences {
+      sentenceCount = RATokenizer(text, unit: .sentence).enumerateTokens { _ in }
+    }
+
+    if shouldDoWords {
+      let tokenizer = RATokenizer(text, unit: .word)
+      if shouldCountSyllables {
+        wordCount = tokenizer.enumerateTokens { word in
+          syllableCount += countSyllables(word: word)
+        }
+      } else {
+        wordCount = tokenizer.enumerateTokens { _ in }
+      }
+    }
+
+    return metrics.reduce([:]) { dict, metric in
+      var value = 0.0
+
+      switch metric {
+      case .syllableCount: value = Double(syllableCount)
+      case .sentenceCount: value = Double(sentenceCount)
+      case .wordCount: value = Double(wordCount)
+      case .avgWordsPerSentence: value = Double(wordCount) / Double(sentenceCount)
+      case .avgSyllablesPerWord: value = Double(syllableCount) / Double(wordCount)
+      }
+
+      var d = dict
+      d[metric] = value
+      return d
+    }
+  }
+}
--- a/Sources/Readability/Scorer.swift
+++ b/Sources/Readability/Scorer.swift
@ -0,0 +1,24 @@
+//
+//  ReadabilityScorer.swift
+//  Readability
+//
+//  Created by Shibo Lyu on 2022/5/19.
+//
+
+import Foundation
+import SwiftUI
+
+struct RAScorerMeta {
+  let name: String
+  let creator: String
+  /** Should be in APA format. */
+  let citation: String
+}
+
+protocol RAScorer {
+  static var meta: RAScorerMeta { get }
+  static var requiresCommonMetrics: Set<RACommonMetric>? { get }
+
+  init()
+  func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double
+}
--- a/Sources/Readability/Scorers/FleschKincaidGrade.swift
+++ b/Sources/Readability/Scorers/FleschKincaidGrade.swift
@ -0,0 +1,28 @@
+//
+//  File.swift
+//  Readability
+//
+//  Created by Shibo Lyu on 2022/5/20.
+//
+
+import Foundation
+
+struct RAFleschKincaidGradeScorer: RAScorer {
+  static let requiresCommonMetrics: Set<RACommonMetric>? = [
+    .avgWordsPerSentence,
+    .avgSyllablesPerWord
+  ]
+
+  static let meta = RAScorerMeta(
+    name: "Flesch-Kincaid Grade",
+    creator: "John P. Kincaid",
+    citation: "Kincaid, J. P., Fishburne Jr, R. P., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count and flesch reading ease formula) for navy enlisted personnel. Naval Technical Training Command Millington TN Research Branch."
+  )
+
+  func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double {
+    let asl = metrics![.avgWordsPerSentence]!
+    let asw = metrics![.avgSyllablesPerWord]!
+
+    return (0.39 * asl) + (11.8 * asw) - 15.59
+  }
+}
--- a/Sources/Readability/Scorers/FleschReadingEase.swift
+++ b/Sources/Readability/Scorers/FleschReadingEase.swift
@ -0,0 +1,28 @@
+//
+//  File.swift
+//  Readability
+//
+//  Created by Shibo Lyu on 2022/5/19.
+//
+
+import Foundation
+
+struct RAFleschReadingEaseScorer: RAScorer {
+  static let requiresCommonMetrics: Set<RACommonMetric>? = [
+    .avgWordsPerSentence,
+    .avgSyllablesPerWord
+  ]
+
+  static let meta = RAScorerMeta(
+    name: "Flesch Reading Ease",
+    creator: "Rudolf Flesch",
+    citation: "Flesch, R. (1948). A new readability yardstick. Journal of applied psychology, 32(3), 221."
+  )
+
+  func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double {
+    let asl = metrics![.avgWordsPerSentence]!
+    let asw = metrics![.avgSyllablesPerWord]!
+
+    return 206.835 - (1.015 * asl) - (84.6 * asw)
+  }
+}
--- a/Sources/Readability/ScoringTask.swift
+++ b/Sources/Readability/ScoringTask.swift
@ -0,0 +1,57 @@
+//
+//  ScoringTask.swift
+//  Readability
+//
+//  Created by Shibo Lyu on 2022/5/19.
+//
+
+import Foundation
+
+class RAScoringTask {
+  typealias Results = [Scorer: Double]
+
+  enum Scorer: Hashable, CaseIterable, Comparable {
+    case fleschReadingEase
+    case fleschKincaidGrade
+  }
+
+  static let availableScorers: [Scorer: RAScorer.Type] = [
+    .fleschReadingEase: RAFleschReadingEaseScorer.self,
+    .fleschKincaidGrade: RAFleschKincaidGradeScorer.self
+  ]
+
+  var scorersToRun: Set<Scorer> = Set(Scorer.allCases)
+
+  var scorers: [Scorer: RAScorer.Type] {
+    scorersToRun.reduce([:]) { partialResult, scorer in
+      var d = partialResult
+      d[scorer] = Self.availableScorers[scorer]!
+      return d
+    }
+  }
+
+  var commonMetricsToGet: Set<RACommonMetric> {
+    scorers
+      .map { $0.1.requiresCommonMetrics }
+      .reduce([]) { partialResult, scorerMetrics in
+        guard let metrics = scorerMetrics else { return partialResult }
+        return partialResult.union(metrics)
+      }
+  }
+
+  func run(on text: String) -> Results {
+    var commonMetrics: RACommonMetricsCalculator.Results? = nil
+
+    if !commonMetricsToGet.isEmpty {
+      commonMetrics = RACommonMetricsCalculator(metrics: commonMetricsToGet)
+        .calculate(on: text)
+    }
+
+    return scorers.reduce([:]) { partialResult, scorerPair in
+      var d = partialResult
+      let (scorer, Scorer) = scorerPair
+      d[scorer] = Scorer.init().score(text, metrics: commonMetrics)
+      return d
+    }
+  }
+}
--- a/Sources/Readability/Tokenization.swift
+++ b/Sources/Readability/Tokenization.swift
@ -0,0 +1,39 @@
+//
+//  Tokenization.swift
+//  Readability
+//
+//  Created by Shibo Lyu on 2022/5/19.
+//
+
+import Foundation
+import NaturalLanguage
+
+struct RATokenizer {
+  private var unit: NLTokenUnit
+  private var text: String
+
+  private var tokenizer: NLTokenizer
+
+  init (_ text: String, unit: NLTokenUnit, language: NLLanguage? = nil) {
+    self.text = text
+    self.unit = unit
+    tokenizer = NLTokenizer(unit: unit)
+    if let language = language {
+      tokenizer.setLanguage(language)
+    }
+    tokenizer.string = text
+  }
+
+  /** Returns token count. */
+  func enumerateTokens(using callBack: (String) -> Void) -> Int {
+    var tokenCount = 0
+    tokenizer.enumerateTokens(in: text.startIndex..<text.endIndex) { range, _ in
+      tokenCount += 1
+      callBack(String(text[range]))
+
+      return true
+    }
+
+    return tokenCount
+  }
+}
--- a/Sources/ReadabilityFramework/ReadabilityFramework.swift
+++ b/Sources/ReadabilityFramework/ReadabilityFramework.swift
@ -1,6 +0,0 @@
-public struct ReadabilityFramework {
-    public private(set) var text = "Hello, World!"
-
-    public init() {
-    }
-}
--- a/Tests/ReadabilityFrameworkTests/ReadabilityFrameworkTests.swift
+++ b/Tests/ReadabilityFrameworkTests/ReadabilityFrameworkTests.swift
@ -1,11 +0,0 @@
-import XCTest
-@testable import ReadabilityFramework
-
-final class ReadabilityFrameworkTests: XCTestCase {
-    func testExample() throws {
-        // This is an example of a functional test case.
-        // Use XCTAssert and related functions to verify your tests produce the correct
-        // results.
-        XCTAssertEqual(ReadabilityFramework().text, "Hello, World!")
-    }
-}
--- a/Tests/ReadabilityTests/CommonMetrics.swift
+++ b/Tests/ReadabilityTests/CommonMetrics.swift
@ -0,0 +1,32 @@
+import XCTest
+@testable import Readability
+
+final class CommonMetrics: XCTestCase {
+  func testOnlyWordCount() throws {
+    let calc = RACommonMetricsCalculator(metrics: [.wordCount])
+
+    let results = calc.calculate(on: "Hello, World!")
+
+    XCTAssertEqual(results, [.wordCount: 2.0])
+  }
+
+  func testAllMetrics() throws {
+    let calc = RACommonMetricsCalculator(metrics: [
+      .syllableCount,
+      .wordCount,
+      .sentenceCount,
+      .avgSyllablesPerWord,
+      .avgWordsPerSentence,
+    ])
+
+    let results = calc.calculate(on: "Hello, World!")
+
+    XCTAssertEqual(results, [
+      .syllableCount: 3.0,
+      .wordCount: 2.0,
+      .sentenceCount: 1.0,
+      .avgSyllablesPerWord: 1.5,
+      .avgWordsPerSentence: 2.0
+    ])
+  }
+}
--- a/Tests/ReadabilityTests/Tokenization.swift
+++ b/Tests/ReadabilityTests/Tokenization.swift
@ -0,0 +1,28 @@
+import XCTest
+@testable import Readability
+
+final class Tokenization: XCTestCase {
+  func testHelloWorldSentence() throws {
+    let tokenizer = RATokenizer("Hello, world!", unit: .sentence)
+
+    var results: [String] = []
+    let count = tokenizer.enumerateTokens { sent in
+      results.append(sent)
+    }
+
+    XCTAssertEqual(count, 1)
+    XCTAssertEqual(results, ["Hello, world!"])
+  }
+
+  func testHelloWorldWord() throws {
+    let tokenizer = RATokenizer("Hello, world!", unit: .word)
+
+    var results: [String] = []
+    let count = tokenizer.enumerateTokens { sent in
+      results.append(sent)
+    }
+
+    XCTAssertEqual(count, 2)
+    XCTAssertEqual(results, ["Hello", "world"])
+  }
+}