mirror of
https://github.com/laosb/ReadabilityFramework.git
synced 2025-04-30 12:41:08 +00:00
Basics.
This commit is contained in:
parent
2640bebf90
commit
d3dcfc84de
13 changed files with 352 additions and 41 deletions
14
Package.resolved
Normal file
14
Package.resolved
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
{
|
||||||
|
"pins" : [
|
||||||
|
{
|
||||||
|
"identity" : "syllable-counter-swift",
|
||||||
|
"kind" : "remoteSourceControl",
|
||||||
|
"location" : "https://github.com/wfreitag/syllable-counter-swift",
|
||||||
|
"state" : {
|
||||||
|
"branch" : "master",
|
||||||
|
"revision" : "029c8568b4d060174284fdedd7473863768a903b"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"version" : 2
|
||||||
|
}
|
|
@ -1,28 +1,32 @@
|
||||||
// swift-tools-version: 5.6
|
// swift-tools-version: 5.6
|
||||||
// The swift-tools-version declares the minimum version of Swift required to build this package.
|
|
||||||
|
|
||||||
import PackageDescription
|
import PackageDescription
|
||||||
|
|
||||||
let package = Package(
|
let package = Package(
|
||||||
name: "ReadabilityFramework",
|
name: "Readability",
|
||||||
products: [
|
platforms: [
|
||||||
// Products define the executables and libraries a package produces, and make them visible to other packages.
|
.macOS(.v10_14),
|
||||||
.library(
|
.iOS(.v12),
|
||||||
name: "ReadabilityFramework",
|
.tvOS(.v12),
|
||||||
targets: ["ReadabilityFramework"]),
|
.watchOS(.v5),
|
||||||
],
|
.macCatalyst(.v14)
|
||||||
dependencies: [
|
],
|
||||||
// Dependencies declare other packages that this package depends on.
|
products: [
|
||||||
// .package(url: /* package url */, from: "1.0.0"),
|
.library(
|
||||||
],
|
name: "Readability",
|
||||||
targets: [
|
targets: ["Readability"]),
|
||||||
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
|
],
|
||||||
// Targets can depend on other targets in this package, and on products in packages this package depends on.
|
dependencies: [
|
||||||
.target(
|
.package(url: "https://github.com/wfreitag/syllable-counter-swift", branch: "master")
|
||||||
name: "ReadabilityFramework",
|
],
|
||||||
dependencies: []),
|
targets: [
|
||||||
.testTarget(
|
.target(
|
||||||
name: "ReadabilityFrameworkTests",
|
name: "Readability",
|
||||||
dependencies: ["ReadabilityFramework"]),
|
dependencies: [
|
||||||
]
|
.product(name: "SyllableCounter", package: "syllable-counter-swift")
|
||||||
|
]),
|
||||||
|
.testTarget(
|
||||||
|
name: "ReadabilityTests",
|
||||||
|
dependencies: ["Readability"]),
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
# ReadabilityFramework
|
# Readability.framework
|
||||||
|
|
||||||
A description of this package.
|
A package that provides the calculation of common readability metrics.
|
||||||
|
|
74
Sources/Readability/CommonMetrics.swift
Normal file
74
Sources/Readability/CommonMetrics.swift
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
//
|
||||||
|
// CommonMetrics.swift
|
||||||
|
// Readability
|
||||||
|
//
|
||||||
|
// Created by Shibo Lyu on 2022/5/19.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
import SyllableCounter
|
||||||
|
|
||||||
|
enum RACommonMetric {
|
||||||
|
case sentenceCount
|
||||||
|
case wordCount
|
||||||
|
case syllableCount
|
||||||
|
case avgWordsPerSentence
|
||||||
|
case avgSyllablesPerWord
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RACommonMetricsCalculator {
|
||||||
|
typealias Results = [RACommonMetric: Double]
|
||||||
|
|
||||||
|
private var metrics: Set<RACommonMetric>
|
||||||
|
|
||||||
|
init(metrics: Set<RACommonMetric>) {
|
||||||
|
self.metrics = metrics
|
||||||
|
}
|
||||||
|
|
||||||
|
private func countSyllables(word: String) -> Int {
|
||||||
|
SyllableCounter.shared.count(word: word)
|
||||||
|
}
|
||||||
|
|
||||||
|
func calculate(on text: String) -> Results {
|
||||||
|
let shouldDoSentences = metrics.contains(.sentenceCount) || metrics.contains(.avgWordsPerSentence)
|
||||||
|
let shouldDoWords = metrics.contains(.wordCount)
|
||||||
|
|| metrics.contains(.avgWordsPerSentence)
|
||||||
|
|| metrics.contains(.avgSyllablesPerWord)
|
||||||
|
let shouldCountSyllables = metrics.contains(.avgSyllablesPerWord) || metrics.contains(.syllableCount)
|
||||||
|
|
||||||
|
var sentenceCount = 0
|
||||||
|
var wordCount = 0
|
||||||
|
var syllableCount = 0
|
||||||
|
|
||||||
|
if shouldDoSentences {
|
||||||
|
sentenceCount = RATokenizer(text, unit: .sentence).enumerateTokens { _ in }
|
||||||
|
}
|
||||||
|
|
||||||
|
if shouldDoWords {
|
||||||
|
let tokenizer = RATokenizer(text, unit: .word)
|
||||||
|
if shouldCountSyllables {
|
||||||
|
wordCount = tokenizer.enumerateTokens { word in
|
||||||
|
syllableCount += countSyllables(word: word)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
wordCount = tokenizer.enumerateTokens { _ in }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return metrics.reduce([:]) { dict, metric in
|
||||||
|
var value = 0.0
|
||||||
|
|
||||||
|
switch metric {
|
||||||
|
case .syllableCount: value = Double(syllableCount)
|
||||||
|
case .sentenceCount: value = Double(sentenceCount)
|
||||||
|
case .wordCount: value = Double(wordCount)
|
||||||
|
case .avgWordsPerSentence: value = Double(wordCount) / Double(sentenceCount)
|
||||||
|
case .avgSyllablesPerWord: value = Double(syllableCount) / Double(wordCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
var d = dict
|
||||||
|
d[metric] = value
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
24
Sources/Readability/Scorer.swift
Normal file
24
Sources/Readability/Scorer.swift
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
//
|
||||||
|
// ReadabilityScorer.swift
|
||||||
|
// Readability
|
||||||
|
//
|
||||||
|
// Created by Shibo Lyu on 2022/5/19.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
import SwiftUI
|
||||||
|
|
||||||
|
struct RAScorerMeta {
|
||||||
|
let name: String
|
||||||
|
let creator: String
|
||||||
|
/** Should be in APA format. */
|
||||||
|
let citation: String
|
||||||
|
}
|
||||||
|
|
||||||
|
protocol RAScorer {
|
||||||
|
static var meta: RAScorerMeta { get }
|
||||||
|
static var requiresCommonMetrics: Set<RACommonMetric>? { get }
|
||||||
|
|
||||||
|
init()
|
||||||
|
func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double
|
||||||
|
}
|
28
Sources/Readability/Scorers/FleschKincaidGrade.swift
Normal file
28
Sources/Readability/Scorers/FleschKincaidGrade.swift
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
//
|
||||||
|
// File.swift
|
||||||
|
// Readability
|
||||||
|
//
|
||||||
|
// Created by Shibo Lyu on 2022/5/20.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
struct RAFleschKincaidGradeScorer: RAScorer {
|
||||||
|
static let requiresCommonMetrics: Set<RACommonMetric>? = [
|
||||||
|
.avgWordsPerSentence,
|
||||||
|
.avgSyllablesPerWord
|
||||||
|
]
|
||||||
|
|
||||||
|
static let meta = RAScorerMeta(
|
||||||
|
name: "Flesch-Kincaid Grade",
|
||||||
|
creator: "John P. Kincaid",
|
||||||
|
citation: "Kincaid, J. P., Fishburne Jr, R. P., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count and flesch reading ease formula) for navy enlisted personnel. Naval Technical Training Command Millington TN Research Branch."
|
||||||
|
)
|
||||||
|
|
||||||
|
func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double {
|
||||||
|
let asl = metrics![.avgWordsPerSentence]!
|
||||||
|
let asw = metrics![.avgSyllablesPerWord]!
|
||||||
|
|
||||||
|
return (0.39 * asl) + (11.8 * asw) - 15.59
|
||||||
|
}
|
||||||
|
}
|
28
Sources/Readability/Scorers/FleschReadingEase.swift
Normal file
28
Sources/Readability/Scorers/FleschReadingEase.swift
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
//
|
||||||
|
// File.swift
|
||||||
|
// Readability
|
||||||
|
//
|
||||||
|
// Created by Shibo Lyu on 2022/5/19.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
struct RAFleschReadingEaseScorer: RAScorer {
|
||||||
|
static let requiresCommonMetrics: Set<RACommonMetric>? = [
|
||||||
|
.avgWordsPerSentence,
|
||||||
|
.avgSyllablesPerWord
|
||||||
|
]
|
||||||
|
|
||||||
|
static let meta = RAScorerMeta(
|
||||||
|
name: "Flesch Reading Ease",
|
||||||
|
creator: "Rudolf Flesch",
|
||||||
|
citation: "Flesch, R. (1948). A new readability yardstick. Journal of applied psychology, 32(3), 221."
|
||||||
|
)
|
||||||
|
|
||||||
|
func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double {
|
||||||
|
let asl = metrics![.avgWordsPerSentence]!
|
||||||
|
let asw = metrics![.avgSyllablesPerWord]!
|
||||||
|
|
||||||
|
return 206.835 - (1.015 * asl) - (84.6 * asw)
|
||||||
|
}
|
||||||
|
}
|
57
Sources/Readability/ScoringTask.swift
Normal file
57
Sources/Readability/ScoringTask.swift
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
//
|
||||||
|
// ScoringTask.swift
|
||||||
|
// Readability
|
||||||
|
//
|
||||||
|
// Created by Shibo Lyu on 2022/5/19.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
class RAScoringTask {
|
||||||
|
typealias Results = [Scorer: Double]
|
||||||
|
|
||||||
|
enum Scorer: Hashable, CaseIterable, Comparable {
|
||||||
|
case fleschReadingEase
|
||||||
|
case fleschKincaidGrade
|
||||||
|
}
|
||||||
|
|
||||||
|
static let availableScorers: [Scorer: RAScorer.Type] = [
|
||||||
|
.fleschReadingEase: RAFleschReadingEaseScorer.self,
|
||||||
|
.fleschKincaidGrade: RAFleschKincaidGradeScorer.self
|
||||||
|
]
|
||||||
|
|
||||||
|
var scorersToRun: Set<Scorer> = Set(Scorer.allCases)
|
||||||
|
|
||||||
|
var scorers: [Scorer: RAScorer.Type] {
|
||||||
|
scorersToRun.reduce([:]) { partialResult, scorer in
|
||||||
|
var d = partialResult
|
||||||
|
d[scorer] = Self.availableScorers[scorer]!
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var commonMetricsToGet: Set<RACommonMetric> {
|
||||||
|
scorers
|
||||||
|
.map { $0.1.requiresCommonMetrics }
|
||||||
|
.reduce([]) { partialResult, scorerMetrics in
|
||||||
|
guard let metrics = scorerMetrics else { return partialResult }
|
||||||
|
return partialResult.union(metrics)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func run(on text: String) -> Results {
|
||||||
|
var commonMetrics: RACommonMetricsCalculator.Results? = nil
|
||||||
|
|
||||||
|
if !commonMetricsToGet.isEmpty {
|
||||||
|
commonMetrics = RACommonMetricsCalculator(metrics: commonMetricsToGet)
|
||||||
|
.calculate(on: text)
|
||||||
|
}
|
||||||
|
|
||||||
|
return scorers.reduce([:]) { partialResult, scorerPair in
|
||||||
|
var d = partialResult
|
||||||
|
let (scorer, Scorer) = scorerPair
|
||||||
|
d[scorer] = Scorer.init().score(text, metrics: commonMetrics)
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
39
Sources/Readability/Tokenization.swift
Normal file
39
Sources/Readability/Tokenization.swift
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
//
|
||||||
|
// Tokenization.swift
|
||||||
|
// Readability
|
||||||
|
//
|
||||||
|
// Created by Shibo Lyu on 2022/5/19.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
import NaturalLanguage
|
||||||
|
|
||||||
|
struct RATokenizer {
|
||||||
|
private var unit: NLTokenUnit
|
||||||
|
private var text: String
|
||||||
|
|
||||||
|
private var tokenizer: NLTokenizer
|
||||||
|
|
||||||
|
init (_ text: String, unit: NLTokenUnit, language: NLLanguage? = nil) {
|
||||||
|
self.text = text
|
||||||
|
self.unit = unit
|
||||||
|
tokenizer = NLTokenizer(unit: unit)
|
||||||
|
if let language = language {
|
||||||
|
tokenizer.setLanguage(language)
|
||||||
|
}
|
||||||
|
tokenizer.string = text
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns token count. */
|
||||||
|
func enumerateTokens(using callBack: (String) -> Void) -> Int {
|
||||||
|
var tokenCount = 0
|
||||||
|
tokenizer.enumerateTokens(in: text.startIndex..<text.endIndex) { range, _ in
|
||||||
|
tokenCount += 1
|
||||||
|
callBack(String(text[range]))
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokenCount
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +0,0 @@
|
||||||
public struct ReadabilityFramework {
|
|
||||||
public private(set) var text = "Hello, World!"
|
|
||||||
|
|
||||||
public init() {
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,11 +0,0 @@
|
||||||
import XCTest
|
|
||||||
@testable import ReadabilityFramework
|
|
||||||
|
|
||||||
final class ReadabilityFrameworkTests: XCTestCase {
|
|
||||||
func testExample() throws {
|
|
||||||
// This is an example of a functional test case.
|
|
||||||
// Use XCTAssert and related functions to verify your tests produce the correct
|
|
||||||
// results.
|
|
||||||
XCTAssertEqual(ReadabilityFramework().text, "Hello, World!")
|
|
||||||
}
|
|
||||||
}
|
|
32
Tests/ReadabilityTests/CommonMetrics.swift
Normal file
32
Tests/ReadabilityTests/CommonMetrics.swift
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
import XCTest
|
||||||
|
@testable import Readability
|
||||||
|
|
||||||
|
final class CommonMetrics: XCTestCase {
|
||||||
|
func testOnlyWordCount() throws {
|
||||||
|
let calc = RACommonMetricsCalculator(metrics: [.wordCount])
|
||||||
|
|
||||||
|
let results = calc.calculate(on: "Hello, World!")
|
||||||
|
|
||||||
|
XCTAssertEqual(results, [.wordCount: 2.0])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testAllMetrics() throws {
|
||||||
|
let calc = RACommonMetricsCalculator(metrics: [
|
||||||
|
.syllableCount,
|
||||||
|
.wordCount,
|
||||||
|
.sentenceCount,
|
||||||
|
.avgSyllablesPerWord,
|
||||||
|
.avgWordsPerSentence,
|
||||||
|
])
|
||||||
|
|
||||||
|
let results = calc.calculate(on: "Hello, World!")
|
||||||
|
|
||||||
|
XCTAssertEqual(results, [
|
||||||
|
.syllableCount: 3.0,
|
||||||
|
.wordCount: 2.0,
|
||||||
|
.sentenceCount: 1.0,
|
||||||
|
.avgSyllablesPerWord: 1.5,
|
||||||
|
.avgWordsPerSentence: 2.0
|
||||||
|
])
|
||||||
|
}
|
||||||
|
}
|
28
Tests/ReadabilityTests/Tokenization.swift
Normal file
28
Tests/ReadabilityTests/Tokenization.swift
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
import XCTest
|
||||||
|
@testable import Readability
|
||||||
|
|
||||||
|
final class Tokenization: XCTestCase {
|
||||||
|
func testHelloWorldSentence() throws {
|
||||||
|
let tokenizer = RATokenizer("Hello, world!", unit: .sentence)
|
||||||
|
|
||||||
|
var results: [String] = []
|
||||||
|
let count = tokenizer.enumerateTokens { sent in
|
||||||
|
results.append(sent)
|
||||||
|
}
|
||||||
|
|
||||||
|
XCTAssertEqual(count, 1)
|
||||||
|
XCTAssertEqual(results, ["Hello, world!"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func testHelloWorldWord() throws {
|
||||||
|
let tokenizer = RATokenizer("Hello, world!", unit: .word)
|
||||||
|
|
||||||
|
var results: [String] = []
|
||||||
|
let count = tokenizer.enumerateTokens { sent in
|
||||||
|
results.append(sent)
|
||||||
|
}
|
||||||
|
|
||||||
|
XCTAssertEqual(count, 2)
|
||||||
|
XCTAssertEqual(results, ["Hello", "world"])
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue