diff --git a/Sources/Readability/CommonMetrics.swift b/Sources/Readability/CommonMetrics.swift index 44ccd4c..989bedb 100644 --- a/Sources/Readability/CommonMetrics.swift +++ b/Sources/Readability/CommonMetrics.swift @@ -12,6 +12,7 @@ public enum RACommonMetric { case sentenceCount case wordCount case syllableCount + case characterCount case avgWordsPerSentence case avgSyllablesPerWord } @@ -21,6 +22,8 @@ public struct RACommonMetricsCalculator { private var metrics: Set + private static let excludeCharacters: [Character] = ["-", "'"] + public init(metrics: Set) { self.metrics = metrics } @@ -34,11 +37,14 @@ public struct RACommonMetricsCalculator { let shouldDoWords = metrics.contains(.wordCount) || metrics.contains(.avgWordsPerSentence) || metrics.contains(.avgSyllablesPerWord) + || metrics.contains(.characterCount) let shouldCountSyllables = metrics.contains(.avgSyllablesPerWord) || metrics.contains(.syllableCount) + let shouldCountCharacters = metrics.contains(.characterCount) var sentenceCount = 0 var wordCount = 0 var syllableCount = 0 + var characterCount = 0 if shouldDoSentences { sentenceCount = RATokenizer(text, unit: .sentence).enumerateTokens { _ in } @@ -46,12 +52,15 @@ public struct RACommonMetricsCalculator { if shouldDoWords { let tokenizer = RATokenizer(text, unit: .word) - if shouldCountSyllables { - wordCount = tokenizer.enumerateTokens { word in + wordCount = tokenizer.enumerateTokens { word in + if shouldCountSyllables { syllableCount += countSyllables(word: word) } - } else { - wordCount = tokenizer.enumerateTokens { _ in } + if shouldCountCharacters { + characterCount += word + .filter { !Self.excludeCharacters.contains($0) } + .count + } } } @@ -62,6 +71,7 @@ public struct RACommonMetricsCalculator { case .syllableCount: value = Double(syllableCount) case .sentenceCount: value = Double(sentenceCount) case .wordCount: value = Double(wordCount) + case .characterCount: value = Double(characterCount) case .avgWordsPerSentence: value = Double(wordCount) / Double(sentenceCount) case .avgSyllablesPerWord: value = Double(syllableCount) / Double(wordCount) } diff --git a/Sources/Readability/Scorers/ColemanLiauIndex.swift b/Sources/Readability/Scorers/ColemanLiauIndex.swift new file mode 100644 index 0000000..d9baca1 --- /dev/null +++ b/Sources/Readability/Scorers/ColemanLiauIndex.swift @@ -0,0 +1,34 @@ +// +// File.swift +// +// +// Created by Shibo Lyu on 2022/5/20. +// + +import Foundation + +public struct RAColemanLiauIndexScorer: RAScorer { + public static let requiresCommonMetrics: Set? = [ + .wordCount, + .characterCount, + .sentenceCount + ] + + public static let meta = RAScorerMeta( + name: "Coleman–Liau Index", + creator: "Meri Coleman & T. L. Liau", + citation: "Coleman, M., & Liau, T. L. (1975). A computer readability formula designed for machine scoring. Journal of Applied Psychology, 60(2), 283." + ) + + public init() {} + + public func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double { + let wordCount = metrics![.wordCount]! + + let l = metrics![.characterCount]! * 100 / wordCount + let s = metrics![.sentenceCount]! * 100 / wordCount + + let cloze = 141.8401 - (0.214590 * l) + (1.079812 * s) + return (-27.4004 * cloze) + 23.06395 + } +} diff --git a/Sources/Readability/ScoringTask.swift b/Sources/Readability/ScoringTask.swift index bad6238..5566e64 100644 --- a/Sources/Readability/ScoringTask.swift +++ b/Sources/Readability/ScoringTask.swift @@ -13,11 +13,13 @@ public class RAScoringTask { public enum Scorer: Hashable, CaseIterable, Comparable { case fleschReadingEase case fleschKincaidGrade + case colemanLiauIndex } public static let availableScorers: [Scorer: RAScorer.Type] = [ .fleschReadingEase: RAFleschReadingEaseScorer.self, - .fleschKincaidGrade: RAFleschKincaidGradeScorer.self + .fleschKincaidGrade: RAFleschKincaidGradeScorer.self, + .colemanLiauIndex: RAColemanLiauIndexScorer.self ] public var scorersToRun: Set