mirror of
https://github.com/laosb/ReadabilityFramework.git
synced 2025-05-23 23:31:08 +00:00
feat: Coleman-Liau Index.
This commit is contained in:
parent
3a5ec5de50
commit
474c4c708d
3 changed files with 51 additions and 5 deletions
|
@ -12,6 +12,7 @@ public enum RACommonMetric {
|
||||||
case sentenceCount
|
case sentenceCount
|
||||||
case wordCount
|
case wordCount
|
||||||
case syllableCount
|
case syllableCount
|
||||||
|
case characterCount
|
||||||
case avgWordsPerSentence
|
case avgWordsPerSentence
|
||||||
case avgSyllablesPerWord
|
case avgSyllablesPerWord
|
||||||
}
|
}
|
||||||
|
@ -21,6 +22,8 @@ public struct RACommonMetricsCalculator {
|
||||||
|
|
||||||
private var metrics: Set<RACommonMetric>
|
private var metrics: Set<RACommonMetric>
|
||||||
|
|
||||||
|
private static let excludeCharacters: [Character] = ["-", "'"]
|
||||||
|
|
||||||
public init(metrics: Set<RACommonMetric>) {
|
public init(metrics: Set<RACommonMetric>) {
|
||||||
self.metrics = metrics
|
self.metrics = metrics
|
||||||
}
|
}
|
||||||
|
@ -34,11 +37,14 @@ public struct RACommonMetricsCalculator {
|
||||||
let shouldDoWords = metrics.contains(.wordCount)
|
let shouldDoWords = metrics.contains(.wordCount)
|
||||||
|| metrics.contains(.avgWordsPerSentence)
|
|| metrics.contains(.avgWordsPerSentence)
|
||||||
|| metrics.contains(.avgSyllablesPerWord)
|
|| metrics.contains(.avgSyllablesPerWord)
|
||||||
|
|| metrics.contains(.characterCount)
|
||||||
let shouldCountSyllables = metrics.contains(.avgSyllablesPerWord) || metrics.contains(.syllableCount)
|
let shouldCountSyllables = metrics.contains(.avgSyllablesPerWord) || metrics.contains(.syllableCount)
|
||||||
|
let shouldCountCharacters = metrics.contains(.characterCount)
|
||||||
|
|
||||||
var sentenceCount = 0
|
var sentenceCount = 0
|
||||||
var wordCount = 0
|
var wordCount = 0
|
||||||
var syllableCount = 0
|
var syllableCount = 0
|
||||||
|
var characterCount = 0
|
||||||
|
|
||||||
if shouldDoSentences {
|
if shouldDoSentences {
|
||||||
sentenceCount = RATokenizer(text, unit: .sentence).enumerateTokens { _ in }
|
sentenceCount = RATokenizer(text, unit: .sentence).enumerateTokens { _ in }
|
||||||
|
@ -46,12 +52,15 @@ public struct RACommonMetricsCalculator {
|
||||||
|
|
||||||
if shouldDoWords {
|
if shouldDoWords {
|
||||||
let tokenizer = RATokenizer(text, unit: .word)
|
let tokenizer = RATokenizer(text, unit: .word)
|
||||||
if shouldCountSyllables {
|
|
||||||
wordCount = tokenizer.enumerateTokens { word in
|
wordCount = tokenizer.enumerateTokens { word in
|
||||||
|
if shouldCountSyllables {
|
||||||
syllableCount += countSyllables(word: word)
|
syllableCount += countSyllables(word: word)
|
||||||
}
|
}
|
||||||
} else {
|
if shouldCountCharacters {
|
||||||
wordCount = tokenizer.enumerateTokens { _ in }
|
characterCount += word
|
||||||
|
.filter { !Self.excludeCharacters.contains($0) }
|
||||||
|
.count
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,6 +71,7 @@ public struct RACommonMetricsCalculator {
|
||||||
case .syllableCount: value = Double(syllableCount)
|
case .syllableCount: value = Double(syllableCount)
|
||||||
case .sentenceCount: value = Double(sentenceCount)
|
case .sentenceCount: value = Double(sentenceCount)
|
||||||
case .wordCount: value = Double(wordCount)
|
case .wordCount: value = Double(wordCount)
|
||||||
|
case .characterCount: value = Double(characterCount)
|
||||||
case .avgWordsPerSentence: value = Double(wordCount) / Double(sentenceCount)
|
case .avgWordsPerSentence: value = Double(wordCount) / Double(sentenceCount)
|
||||||
case .avgSyllablesPerWord: value = Double(syllableCount) / Double(wordCount)
|
case .avgSyllablesPerWord: value = Double(syllableCount) / Double(wordCount)
|
||||||
}
|
}
|
||||||
|
|
34
Sources/Readability/Scorers/ColemanLiauIndex.swift
Normal file
34
Sources/Readability/Scorers/ColemanLiauIndex.swift
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
//
|
||||||
|
// File.swift
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Created by Shibo Lyu on 2022/5/20.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
public struct RAColemanLiauIndexScorer: RAScorer {
|
||||||
|
public static let requiresCommonMetrics: Set<RACommonMetric>? = [
|
||||||
|
.wordCount,
|
||||||
|
.characterCount,
|
||||||
|
.sentenceCount
|
||||||
|
]
|
||||||
|
|
||||||
|
public static let meta = RAScorerMeta(
|
||||||
|
name: "Coleman–Liau Index",
|
||||||
|
creator: "Meri Coleman & T. L. Liau",
|
||||||
|
citation: "Coleman, M., & Liau, T. L. (1975). A computer readability formula designed for machine scoring. Journal of Applied Psychology, 60(2), 283."
|
||||||
|
)
|
||||||
|
|
||||||
|
public init() {}
|
||||||
|
|
||||||
|
public func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double {
|
||||||
|
let wordCount = metrics![.wordCount]!
|
||||||
|
|
||||||
|
let l = metrics![.characterCount]! * 100 / wordCount
|
||||||
|
let s = metrics![.sentenceCount]! * 100 / wordCount
|
||||||
|
|
||||||
|
let cloze = 141.8401 - (0.214590 * l) + (1.079812 * s)
|
||||||
|
return (-27.4004 * cloze) + 23.06395
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,11 +13,13 @@ public class RAScoringTask {
|
||||||
public enum Scorer: Hashable, CaseIterable, Comparable {
|
public enum Scorer: Hashable, CaseIterable, Comparable {
|
||||||
case fleschReadingEase
|
case fleschReadingEase
|
||||||
case fleschKincaidGrade
|
case fleschKincaidGrade
|
||||||
|
case colemanLiauIndex
|
||||||
}
|
}
|
||||||
|
|
||||||
public static let availableScorers: [Scorer: RAScorer.Type] = [
|
public static let availableScorers: [Scorer: RAScorer.Type] = [
|
||||||
.fleschReadingEase: RAFleschReadingEaseScorer.self,
|
.fleschReadingEase: RAFleschReadingEaseScorer.self,
|
||||||
.fleschKincaidGrade: RAFleschKincaidGradeScorer.self
|
.fleschKincaidGrade: RAFleschKincaidGradeScorer.self,
|
||||||
|
.colemanLiauIndex: RAColemanLiauIndexScorer.self
|
||||||
]
|
]
|
||||||
|
|
||||||
public var scorersToRun: Set<Scorer>
|
public var scorersToRun: Set<Scorer>
|
||||||
|
|
Loading…
Add table
Reference in a new issue