feat: Coleman-Liau Index.

This commit is contained in:
Shibo Lyu 2022-05-20 18:09:12 +08:00
parent 3a5ec5de50
commit 474c4c708d
3 changed files with 51 additions and 5 deletions

View file

@ -12,6 +12,7 @@ public enum RACommonMetric {
case sentenceCount
case wordCount
case syllableCount
case characterCount
case avgWordsPerSentence
case avgSyllablesPerWord
}
@ -21,6 +22,8 @@ public struct RACommonMetricsCalculator {
private var metrics: Set<RACommonMetric>
private static let excludeCharacters: [Character] = ["-", "'"]
public init(metrics: Set<RACommonMetric>) {
self.metrics = metrics
}
@ -34,11 +37,14 @@ public struct RACommonMetricsCalculator {
let shouldDoWords = metrics.contains(.wordCount)
|| metrics.contains(.avgWordsPerSentence)
|| metrics.contains(.avgSyllablesPerWord)
|| metrics.contains(.characterCount)
let shouldCountSyllables = metrics.contains(.avgSyllablesPerWord) || metrics.contains(.syllableCount)
let shouldCountCharacters = metrics.contains(.characterCount)
var sentenceCount = 0
var wordCount = 0
var syllableCount = 0
var characterCount = 0
if shouldDoSentences {
sentenceCount = RATokenizer(text, unit: .sentence).enumerateTokens { _ in }
@ -46,12 +52,15 @@ public struct RACommonMetricsCalculator {
if shouldDoWords {
let tokenizer = RATokenizer(text, unit: .word)
if shouldCountSyllables {
wordCount = tokenizer.enumerateTokens { word in
if shouldCountSyllables {
syllableCount += countSyllables(word: word)
}
} else {
wordCount = tokenizer.enumerateTokens { _ in }
if shouldCountCharacters {
characterCount += word
.filter { !Self.excludeCharacters.contains($0) }
.count
}
}
}
@ -62,6 +71,7 @@ public struct RACommonMetricsCalculator {
case .syllableCount: value = Double(syllableCount)
case .sentenceCount: value = Double(sentenceCount)
case .wordCount: value = Double(wordCount)
case .characterCount: value = Double(characterCount)
case .avgWordsPerSentence: value = Double(wordCount) / Double(sentenceCount)
case .avgSyllablesPerWord: value = Double(syllableCount) / Double(wordCount)
}

View file

@ -0,0 +1,34 @@
//
// File.swift
//
//
// Created by Shibo Lyu on 2022/5/20.
//
import Foundation
public struct RAColemanLiauIndexScorer: RAScorer {
public static let requiresCommonMetrics: Set<RACommonMetric>? = [
.wordCount,
.characterCount,
.sentenceCount
]
public static let meta = RAScorerMeta(
name: "ColemanLiau Index",
creator: "Meri Coleman & T. L. Liau",
citation: "Coleman, M., & Liau, T. L. (1975). A computer readability formula designed for machine scoring. Journal of Applied Psychology, 60(2), 283."
)
public init() {}
public func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double {
let wordCount = metrics![.wordCount]!
let l = metrics![.characterCount]! * 100 / wordCount
let s = metrics![.sentenceCount]! * 100 / wordCount
let cloze = 141.8401 - (0.214590 * l) + (1.079812 * s)
return (-27.4004 * cloze) + 23.06395
}
}

View file

@ -13,11 +13,13 @@ public class RAScoringTask {
public enum Scorer: Hashable, CaseIterable, Comparable {
case fleschReadingEase
case fleschKincaidGrade
case colemanLiauIndex
}
public static let availableScorers: [Scorer: RAScorer.Type] = [
.fleschReadingEase: RAFleschReadingEaseScorer.self,
.fleschKincaidGrade: RAFleschKincaidGradeScorer.self
.fleschKincaidGrade: RAFleschKincaidGradeScorer.self,
.colemanLiauIndex: RAColemanLiauIndexScorer.self
]
public var scorersToRun: Set<Scorer>