Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ let swiftSettings: [SwiftSetting] = [

let package = Package(
name: "swift-evolution-metadata-extractor",
platforms: [ .macOS(.v14) ],
platforms: [ .macOS(.v15) ],
products: [
.library(name: "EvolutionMetadataModel", targets: ["EvolutionMetadataModel"]),
.executable(name: "swift-evolution-metadata-extractor", targets: ["swift-evolution-metadata-extractor"])
Expand Down
52 changes: 27 additions & 25 deletions Sources/EvolutionMetadataExtraction/ExtractionJob.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,16 @@ public struct ExtractionJob: Sendable {
let extractionDate: Date
}

let project: Project
let proposalSpecs: [ProposalSpec]
let previousResults: EvolutionMetadata?
let forcedExtractionIDs: [String]
let jobMetadata: JobMetadata
let output: Output
let snapshot: Snapshot?

private init(output: Output, snapshot: Snapshot?, proposalSpecs: [ProposalSpec], previousResults: EvolutionMetadata?, forcedExtractionIDs: [String], jobMetadata: JobMetadata) {
private init(project: Project, output: Output, snapshot: Snapshot?, proposalSpecs: [ProposalSpec], previousResults: EvolutionMetadata?, forcedExtractionIDs: [String], jobMetadata: JobMetadata) {
self.project = project
self.proposalSpecs = proposalSpecs
self.previousResults = previousResults
self.forcedExtractionIDs = forcedExtractionIDs
Expand All @@ -69,16 +71,16 @@ public struct ExtractionJob: Sendable {

}

public static func makeExtractionJob(source: Source, output: Output, ignorePreviousResults: Bool = false, forcedExtractionIDs: [String] = [], extractionDate: Date = Date()) async throws -> ExtractionJob {
public static func makeExtractionJob(project: Project = Project.default, source: Source, output: Output, ignorePreviousResults: Bool = false, forcedExtractionIDs: [String] = [], extractionDate: Date = Date()) async throws -> ExtractionJob {
switch source {
case .network:
try await makeNetworkExtractionJob(output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate)
try await makeNetworkExtractionJob(project: project, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate)
case .snapshot(let snapshotURL):
try await makeSnapshotExtractionJob(snapshotURL: snapshotURL, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate)
try await makeSnapshotExtractionJob(project: project, snapshotURL: snapshotURL, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate)
case .files(let fileURLs):
try makeFilesExtractionJob(fileURLs: fileURLs, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate)
try makeFilesExtractionJob(project: project, fileURLs: fileURLs, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate)
case .pullRequest(let pullRequestID):
try await makePullRequestExtractionJob(pullRequestID: pullRequestID, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate)
try await makePullRequestExtractionJob(project: project, pullRequestID: pullRequestID, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate)
}
}
}
Expand All @@ -87,93 +89,93 @@ public struct ExtractionJob: Sendable {

extension ExtractionJob {

private static func makeNetworkExtractionJob(output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob {
private static func makeNetworkExtractionJob(project: Project, output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob {

async let previousResults = previousResults(from: PreviousResultsFetcher.previousResultsURL, ignorePreviousResults: ignorePreviousResults)
let mainBranchInfo = try await GitHubFetcher.fetchMainBranch()
async let previousResults = previousResults(from: project.previousResultsURL, ignorePreviousResults: ignorePreviousResults)
let mainBranchInfo = try await GitHubFetcher.fetchMainBranch(for: project)
let sha = mainBranchInfo.commit.sha
let proposalContentItems = try await GitHubFetcher.fetchProposalContentItems(for: sha)
let proposalContentItems = try await GitHubFetcher.fetchProposalContentItems(for: project, sha: sha)

// The proposals/ directory may have subdirectories for
// proposals from specific workgroups. For now, proposals
// in those subdirectories are filtered out of this proposal
// specs array.
let proposalSpecs = proposalContentItems.enumerated().compactMap {
$1.proposalSpec(sortIndex: $0)
$1.proposalSpec(project: project, sortIndex: $0)
}

let jobMetadata = JobMetadata(commit: sha, extractionDate: extractionDate)

let snapshot: Snapshot?
if case let .snapshot(destURL) = output {
snapshot = Snapshot(sourceURL: nil, destURL: destURL, proposalListing: proposalContentItems, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: mainBranchInfo, snapshotDate: extractionDate)
snapshot = Snapshot(project: project, sourceURL: nil, destURL: destURL, proposalListing: proposalContentItems, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: mainBranchInfo, snapshotDate: extractionDate)
} else {
snapshot = nil
}

return ExtractionJob(output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: try await previousResults, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata)
return ExtractionJob(project: project, output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: try await previousResults, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata)
}

private static func makeSnapshotExtractionJob(snapshotURL: URL, output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob {
private static func makeSnapshotExtractionJob(project: Project, snapshotURL: URL, output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob {

// Argument validation should ensure correct values. Assert to catch problems in usage in tests.
assert(snapshotURL.pathExtension == "evosnapshot", "Snapshot URL must be a directory with 'evosnapshot' extension.")

verbosePrint("Using local snapshot\n'\(snapshotURL.relativePath)'")

let sourceSnapshot = try await Snapshot.makeSnapshot(snapshotURL: snapshotURL, destURL: output.snapshotURL, ignorePreviousResults: ignorePreviousResults, extractionDate: extractionDate)
let sourceSnapshot = try await Snapshot.makeSnapshot(project: project, snapshotURL: snapshotURL, destURL: output.snapshotURL, ignorePreviousResults: ignorePreviousResults, extractionDate: extractionDate)

let jobMetadata = JobMetadata(commit: sourceSnapshot.branchInfo?.commit.sha, extractionDate: sourceSnapshot.snapshotDate)

// Always use sourceSnapshot, its values are used in tests
return ExtractionJob(output: output, snapshot: sourceSnapshot, proposalSpecs: sourceSnapshot.proposalSpecs, previousResults: sourceSnapshot.previousResults, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata)
return ExtractionJob(project: project, output: output, snapshot: sourceSnapshot, proposalSpecs: sourceSnapshot.proposalSpecs, previousResults: sourceSnapshot.previousResults, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata)
}

private static func makeFilesExtractionJob(fileURLs: [URL], output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) throws -> ExtractionJob {
private static func makeFilesExtractionJob(project: Project, fileURLs: [URL], output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) throws -> ExtractionJob {

// Argument validation should ensure correct values. Assert to catch problems in usage in tests.
assert(ignorePreviousResults == true && forcedExtractionIDs.isEmpty, "Extraction from a file URLs always ignores previous results and performs a full extraction")

let proposalSpecs = fileURLs
.sorted(using: SortDescriptor(\URL.lastPathComponent, order: .forward))
.enumerated()
.map { ProposalSpec(url: $1, sha: "", sortIndex: $0) }
.map { ProposalSpec(project: project, url: $1, sha: "", sortIndex: $0) }

let jobMetadata = JobMetadata(commit: "", extractionDate: extractionDate)

let snapshot: Snapshot?
if case let .snapshot(destURL) = output {
snapshot = Snapshot(sourceURL: nil, destURL: destURL, proposalListing: nil, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: nil, snapshotDate: extractionDate)
snapshot = Snapshot(project: project, sourceURL: nil, destURL: destURL, proposalListing: nil, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: nil, snapshotDate: extractionDate)
} else {
snapshot = nil
}

return ExtractionJob(output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: nil, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata)
return ExtractionJob(project: project, output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: nil, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata)
}

private static func makePullRequestExtractionJob(pullRequestID: Int, output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob {
private static func makePullRequestExtractionJob(project: Project, pullRequestID: Int, output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob {

// Argument validation should ensure correct values. Assert to catch problems in usage in tests.
assert(ignorePreviousResults == true && forcedExtractionIDs.isEmpty, "Extraction from a pull request always ignores previous results and performs a full extraction")

let proposalContentItems = try await GitHubFetcher.fetchPullRequestProposalList(for: pullRequestID)
let proposalContentItems = try await GitHubFetcher.fetchPullRequestProposalList(for: project, pullRequestNumber: pullRequestID)

// The proposals/ directory may have subdirectories for proposals from specific workgroups.
// Proposals in those subdirectories are filtered out of this proposal specs array.
let proposalSpecs = proposalContentItems.enumerated().compactMap {
$1.proposalSpec(sortIndex: $0)
$1.proposalSpec(project: project, sortIndex: $0)
}

let jobMetadata = JobMetadata(commit: "", extractionDate: extractionDate)

let snapshot: Snapshot?
if case let .snapshot(destURL) = output {
snapshot = Snapshot(sourceURL: nil, destURL: destURL, proposalListing: nil, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: nil, snapshotDate: extractionDate)
snapshot = Snapshot(project: project, sourceURL: nil, destURL: destURL, proposalListing: nil, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: nil, snapshotDate: extractionDate)
} else {
snapshot = nil
}

return ExtractionJob(output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: nil, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata)
return ExtractionJob(project: project, output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: nil, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata)
}

static func previousResults(from url: URL, ignorePreviousResults: Bool) async throws -> EvolutionMetadata? {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,15 @@ struct EvolutionMetadataExtractor {
/// The listing of proposals to be processed may come from a GitHub proposal listing or scanning the contents of a directory.
///
struct ProposalSpec: Sendable {
let project: Project
let url: URL
let sha: String
let sortIndex: Int
var id: String { "SE-" + url.lastPathComponent.prefix(4) }
var id: String { "\(project.proposalPrefix)-\(url.lastPathComponent.prefix(4))" }
var filename: String { url.lastPathComponent }

init(url: URL, sha: String, sortIndex: Int) {
init(project: Project, url: URL, sha: String, sortIndex: Int) {
self.project = project
self.url = url
self.sha = sha
self.sortIndex = sortIndex
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@ struct DiscussionExtractor: MarkupWalker, ValueExtractor {

private var discussions: [Proposal.Discussion] = []

mutating func extractValue(from sourceValues: (headerFieldsByLabel: [String : ListItem], proposalID: String)) -> ExtractionResult<[Proposal.Discussion]> {
mutating func extractValue(from source: HeaderFieldSource) -> ExtractionResult<[Proposal.Discussion]> {

// VALIDATION ENHANCEMENT: Normalize naming to 'Review' in the source proposals.
if let (_, headerField) = sourceValues.headerFieldsByLabel[["Review", "Reviews", "Decision Notes", "Decision notes"]] {
if let (_, headerField) = source["Review", "Reviews", "Decision Notes", "Decision notes"] {
visit(headerField)

// VALIDATION ENHANCEMENT: Correct proposals with known issues and remove special case logic.
// Currently a fair number of older proposals are missing links to discussions or do not
// format discussions correctly. Those issues should be corrected in the proposals themselves.
// Once all of those issues are resolved, the legacy check can be removed.
if discussions.isEmpty && !Legacy.discussionExtractionFailures.contains(sourceValues.proposalID) {
if discussions.isEmpty && !Legacy.discussionExtractionFailures.contains(source.proposalSpec.id) {
errors.append(.discussionExtractionFailure)
}
} else {
Expand All @@ -35,7 +35,7 @@ struct DiscussionExtractor: MarkupWalker, ValueExtractor {
// field for a variety of reasons. Those issues should be corrected in the proposals themselves.
// Once all of those issues are resolved, the legacy check can be removed.
// Note that some very early proposals may not have valid discussions be extracted.
if !Legacy.missingReviewFields.contains(sourceValues.proposalID) {
if !Legacy.missingReviewFields.contains(source.proposalSpec.id) {
errors.append(.missingReviewField)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ struct HeaderFieldExtractor: MarkupWalker, ValueExtractor {
private var warnings: [Proposal.Issue] = []
private var errors: [Proposal.Issue] = []

mutating func extractValue(from document: Document) -> ExtractionResult<[String: ListItem]> {
guard let headerFields = document.child(through: [(1, UnorderedList.self)]) as? UnorderedList else {
mutating func extractValue(from src: DocumentSource) -> ExtractionResult<[String: ListItem]> {
guard let headerFields = src.document.child(through: [(1, UnorderedList.self)]) as? UnorderedList else {
return ExtractionResult(value: nil, warnings: warnings, errors: errors)
}
visit(headerFields)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ struct ImplementationExtractor: MarkupWalker, ValueExtractor {
}
private var _implementaton: [Proposal.Implementation] = []

mutating func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult<[Proposal.Implementation]> {
if let (_ , headerField) = headerFieldsByLabel[["Implementation", "Implementations"]] {
mutating func extractValue(from source: HeaderFieldSource) -> ExtractionResult<[Proposal.Implementation]> {
if let (_ , headerField) = source["Implementation", "Implementations"] {
visit(headerField)
}
// Implementation field is optional. Take no action / add no warning if it is not found
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ import Markdown
import EvolutionMetadataModel

struct AuthorExtractor: ValueExtractor {
func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult<[Proposal.Person]> {
func extractValue(from src: HeaderFieldSource) -> ExtractionResult<[Proposal.Person]> {
var personExtractor = PersonExtractor(role: .author)
return personExtractor.personArray(from: headerFieldsByLabel)
return personExtractor.personArray(from: src)
}
}

struct ReviewManagerExtractor: ValueExtractor {
func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult<[Proposal.Person]> {
func extractValue(from src: HeaderFieldSource) -> ExtractionResult<[Proposal.Person]> {
var personExtractor = PersonExtractor(role: .reviewManager)
return personExtractor.personArray(from: headerFieldsByLabel)
return personExtractor.personArray(from: src)
}
}

Expand All @@ -40,13 +40,13 @@ struct PersonExtractor: MarkupWalker {
self.role = role
}

mutating func personArray(from headerFields: [String : ListItem]) -> ExtractionResult<[Proposal.Person]> {
mutating func personArray(from source: HeaderFieldSource) -> ExtractionResult<[Proposal.Person]> {
let headerLabels = switch role {
case .author: ["Author", "Authors"]
// VALIDATION ENHANCEMENT: Normalize capitalization to 'Review Manager'
case .reviewManager: ["Review manager", "Review Manager", "Review managers", "Review Managers"]
}
if let (_, headerField) = headerFields[headerLabels] {
if let (_, headerField) = source[headerLabels] {
visit(headerField)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ struct PreviousProposalExtractor: MarkupWalker, ValueExtractor {
}
private var _previousProposalIDs: [String] = []

mutating func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult<[String]> {
mutating func extractValue(from source: HeaderFieldSource) -> ExtractionResult<[String]> {

if let prevPropField = headerFieldsByLabel[["Previous Proposal", "Previous Proposals"]] {
if let prevPropField = source["Previous Proposal", "Previous Proposals"] {
visit(prevPropField.value)

// validate that if the header field is here at least one proposal ID was found
Expand Down
Loading