diff --git a/Package.swift b/Package.swift index a17c66b..f330abe 100644 --- a/Package.swift +++ b/Package.swift @@ -17,7 +17,7 @@ let swiftSettings: [SwiftSetting] = [ let package = Package( name: "swift-evolution-metadata-extractor", - platforms: [ .macOS(.v14) ], + platforms: [ .macOS(.v15) ], products: [ .library(name: "EvolutionMetadataModel", targets: ["EvolutionMetadataModel"]), .executable(name: "swift-evolution-metadata-extractor", targets: ["swift-evolution-metadata-extractor"]) diff --git a/Sources/EvolutionMetadataExtraction/ExtractionJob.swift b/Sources/EvolutionMetadataExtraction/ExtractionJob.swift index 4fd49d1..b1098b2 100644 --- a/Sources/EvolutionMetadataExtraction/ExtractionJob.swift +++ b/Sources/EvolutionMetadataExtraction/ExtractionJob.swift @@ -41,6 +41,7 @@ public struct ExtractionJob: Sendable { let extractionDate: Date } + let project: Project let proposalSpecs: [ProposalSpec] let previousResults: EvolutionMetadata? let forcedExtractionIDs: [String] @@ -48,7 +49,8 @@ public struct ExtractionJob: Sendable { let output: Output let snapshot: Snapshot? - private init(output: Output, snapshot: Snapshot?, proposalSpecs: [ProposalSpec], previousResults: EvolutionMetadata?, forcedExtractionIDs: [String], jobMetadata: JobMetadata) { + private init(project: Project, output: Output, snapshot: Snapshot?, proposalSpecs: [ProposalSpec], previousResults: EvolutionMetadata?, forcedExtractionIDs: [String], jobMetadata: JobMetadata) { + self.project = project self.proposalSpecs = proposalSpecs self.previousResults = previousResults self.forcedExtractionIDs = forcedExtractionIDs @@ -69,16 +71,16 @@ public struct ExtractionJob: Sendable { } - public static func makeExtractionJob(source: Source, output: Output, ignorePreviousResults: Bool = false, forcedExtractionIDs: [String] = [], extractionDate: Date = Date()) async throws -> ExtractionJob { + public static func makeExtractionJob(project: Project = Project.default, source: Source, output: Output, ignorePreviousResults: Bool = false, forcedExtractionIDs: [String] = [], extractionDate: Date = Date()) async throws -> ExtractionJob { switch source { case .network: - try await makeNetworkExtractionJob(output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate) + try await makeNetworkExtractionJob(project: project, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate) case .snapshot(let snapshotURL): - try await makeSnapshotExtractionJob(snapshotURL: snapshotURL, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate) + try await makeSnapshotExtractionJob(project: project, snapshotURL: snapshotURL, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate) case .files(let fileURLs): - try makeFilesExtractionJob(fileURLs: fileURLs, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate) + try makeFilesExtractionJob(project: project, fileURLs: fileURLs, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate) case .pullRequest(let pullRequestID): - try await makePullRequestExtractionJob(pullRequestID: pullRequestID, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate) + try await makePullRequestExtractionJob(project: project, pullRequestID: pullRequestID, output: output, ignorePreviousResults: ignorePreviousResults, forcedExtractionIDs: forcedExtractionIDs, extractionDate: extractionDate) } } } @@ -87,49 +89,49 @@ public struct ExtractionJob: Sendable { extension ExtractionJob { - private static func makeNetworkExtractionJob(output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob { + private static func makeNetworkExtractionJob(project: Project, output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob { - async let previousResults = previousResults(from: PreviousResultsFetcher.previousResultsURL, ignorePreviousResults: ignorePreviousResults) - let mainBranchInfo = try await GitHubFetcher.fetchMainBranch() + async let previousResults = previousResults(from: project.previousResultsURL, ignorePreviousResults: ignorePreviousResults) + let mainBranchInfo = try await GitHubFetcher.fetchMainBranch(for: project) let sha = mainBranchInfo.commit.sha - let proposalContentItems = try await GitHubFetcher.fetchProposalContentItems(for: sha) + let proposalContentItems = try await GitHubFetcher.fetchProposalContentItems(for: project, sha: sha) // The proposals/ directory may have subdirectories for // proposals from specific workgroups. For now, proposals // in those subdirectories are filtered out of this proposal // specs array. let proposalSpecs = proposalContentItems.enumerated().compactMap { - $1.proposalSpec(sortIndex: $0) + $1.proposalSpec(project: project, sortIndex: $0) } let jobMetadata = JobMetadata(commit: sha, extractionDate: extractionDate) let snapshot: Snapshot? if case let .snapshot(destURL) = output { - snapshot = Snapshot(sourceURL: nil, destURL: destURL, proposalListing: proposalContentItems, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: mainBranchInfo, snapshotDate: extractionDate) + snapshot = Snapshot(project: project, sourceURL: nil, destURL: destURL, proposalListing: proposalContentItems, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: mainBranchInfo, snapshotDate: extractionDate) } else { snapshot = nil } - return ExtractionJob(output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: try await previousResults, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata) + return ExtractionJob(project: project, output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: try await previousResults, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata) } - private static func makeSnapshotExtractionJob(snapshotURL: URL, output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob { + private static func makeSnapshotExtractionJob(project: Project, snapshotURL: URL, output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob { // Argument validation should ensure correct values. Assert to catch problems in usage in tests. assert(snapshotURL.pathExtension == "evosnapshot", "Snapshot URL must be a directory with 'evosnapshot' extension.") verbosePrint("Using local snapshot\n'\(snapshotURL.relativePath)'") - let sourceSnapshot = try await Snapshot.makeSnapshot(snapshotURL: snapshotURL, destURL: output.snapshotURL, ignorePreviousResults: ignorePreviousResults, extractionDate: extractionDate) + let sourceSnapshot = try await Snapshot.makeSnapshot(project: project, snapshotURL: snapshotURL, destURL: output.snapshotURL, ignorePreviousResults: ignorePreviousResults, extractionDate: extractionDate) let jobMetadata = JobMetadata(commit: sourceSnapshot.branchInfo?.commit.sha, extractionDate: sourceSnapshot.snapshotDate) // Always use sourceSnapshot, its values are used in tests - return ExtractionJob(output: output, snapshot: sourceSnapshot, proposalSpecs: sourceSnapshot.proposalSpecs, previousResults: sourceSnapshot.previousResults, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata) + return ExtractionJob(project: project, output: output, snapshot: sourceSnapshot, proposalSpecs: sourceSnapshot.proposalSpecs, previousResults: sourceSnapshot.previousResults, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata) } - private static func makeFilesExtractionJob(fileURLs: [URL], output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) throws -> ExtractionJob { + private static func makeFilesExtractionJob(project: Project, fileURLs: [URL], output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) throws -> ExtractionJob { // Argument validation should ensure correct values. Assert to catch problems in usage in tests. assert(ignorePreviousResults == true && forcedExtractionIDs.isEmpty, "Extraction from a file URLs always ignores previous results and performs a full extraction") @@ -137,43 +139,43 @@ extension ExtractionJob { let proposalSpecs = fileURLs .sorted(using: SortDescriptor(\URL.lastPathComponent, order: .forward)) .enumerated() - .map { ProposalSpec(url: $1, sha: "", sortIndex: $0) } + .map { ProposalSpec(project: project, url: $1, sha: "", sortIndex: $0) } let jobMetadata = JobMetadata(commit: "", extractionDate: extractionDate) let snapshot: Snapshot? if case let .snapshot(destURL) = output { - snapshot = Snapshot(sourceURL: nil, destURL: destURL, proposalListing: nil, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: nil, snapshotDate: extractionDate) + snapshot = Snapshot(project: project, sourceURL: nil, destURL: destURL, proposalListing: nil, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: nil, snapshotDate: extractionDate) } else { snapshot = nil } - return ExtractionJob(output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: nil, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata) + return ExtractionJob(project: project, output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: nil, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata) } - private static func makePullRequestExtractionJob(pullRequestID: Int, output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob { + private static func makePullRequestExtractionJob(project: Project, pullRequestID: Int, output: Output, ignorePreviousResults: Bool, forcedExtractionIDs: [String], extractionDate: Date) async throws -> ExtractionJob { // Argument validation should ensure correct values. Assert to catch problems in usage in tests. assert(ignorePreviousResults == true && forcedExtractionIDs.isEmpty, "Extraction from a pull request always ignores previous results and performs a full extraction") - let proposalContentItems = try await GitHubFetcher.fetchPullRequestProposalList(for: pullRequestID) + let proposalContentItems = try await GitHubFetcher.fetchPullRequestProposalList(for: project, pullRequestNumber: pullRequestID) // The proposals/ directory may have subdirectories for proposals from specific workgroups. // Proposals in those subdirectories are filtered out of this proposal specs array. let proposalSpecs = proposalContentItems.enumerated().compactMap { - $1.proposalSpec(sortIndex: $0) + $1.proposalSpec(project: project, sortIndex: $0) } let jobMetadata = JobMetadata(commit: "", extractionDate: extractionDate) let snapshot: Snapshot? if case let .snapshot(destURL) = output { - snapshot = Snapshot(sourceURL: nil, destURL: destURL, proposalListing: nil, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: nil, snapshotDate: extractionDate) + snapshot = Snapshot(project: project, sourceURL: nil, destURL: destURL, proposalListing: nil, directoryContents: [], proposalSpecs: [], previousResults: nil, expectedResults: nil, branchInfo: nil, snapshotDate: extractionDate) } else { snapshot = nil } - return ExtractionJob(output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: nil, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata) + return ExtractionJob(project: project, output: output, snapshot: snapshot, proposalSpecs: proposalSpecs, previousResults: nil, forcedExtractionIDs: forcedExtractionIDs, jobMetadata: jobMetadata) } static func previousResults(from url: URL, ignorePreviousResults: Bool) async throws -> EvolutionMetadata? { diff --git a/Sources/EvolutionMetadataExtraction/Extractors/EvolutionMetadataExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/EvolutionMetadataExtractor.swift index 2103566..55dc87f 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/EvolutionMetadataExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/EvolutionMetadataExtractor.swift @@ -166,13 +166,15 @@ struct EvolutionMetadataExtractor { /// The listing of proposals to be processed may come from a GitHub proposal listing or scanning the contents of a directory. /// struct ProposalSpec: Sendable { + let project: Project let url: URL let sha: String let sortIndex: Int - var id: String { "SE-" + url.lastPathComponent.prefix(4) } + var id: String { "\(project.proposalPrefix)-\(url.lastPathComponent.prefix(4))" } var filename: String { url.lastPathComponent } - init(url: URL, sha: String, sortIndex: Int) { + init(project: Project, url: URL, sha: String, sortIndex: Int) { + self.project = project self.url = url self.sha = sha self.sortIndex = sortIndex diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/DiscussionExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/DiscussionExtractor.swift index 7f90494..e168288 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/DiscussionExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/DiscussionExtractor.swift @@ -16,17 +16,17 @@ struct DiscussionExtractor: MarkupWalker, ValueExtractor { private var discussions: [Proposal.Discussion] = [] - mutating func extractValue(from sourceValues: (headerFieldsByLabel: [String : ListItem], proposalID: String)) -> ExtractionResult<[Proposal.Discussion]> { + mutating func extractValue(from source: HeaderFieldSource) -> ExtractionResult<[Proposal.Discussion]> { // VALIDATION ENHANCEMENT: Normalize naming to 'Review' in the source proposals. - if let (_, headerField) = sourceValues.headerFieldsByLabel[["Review", "Reviews", "Decision Notes", "Decision notes"]] { + if let (_, headerField) = source["Review", "Reviews", "Decision Notes", "Decision notes"] { visit(headerField) // VALIDATION ENHANCEMENT: Correct proposals with known issues and remove special case logic. // Currently a fair number of older proposals are missing links to discussions or do not // format discussions correctly. Those issues should be corrected in the proposals themselves. // Once all of those issues are resolved, the legacy check can be removed. - if discussions.isEmpty && !Legacy.discussionExtractionFailures.contains(sourceValues.proposalID) { + if discussions.isEmpty && !Legacy.discussionExtractionFailures.contains(source.proposalSpec.id) { errors.append(.discussionExtractionFailure) } } else { @@ -35,7 +35,7 @@ struct DiscussionExtractor: MarkupWalker, ValueExtractor { // field for a variety of reasons. Those issues should be corrected in the proposals themselves. // Once all of those issues are resolved, the legacy check can be removed. // Note that some very early proposals may not have valid discussions be extracted. - if !Legacy.missingReviewFields.contains(sourceValues.proposalID) { + if !Legacy.missingReviewFields.contains(source.proposalSpec.id) { errors.append(.missingReviewField) } } diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/HeaderFieldExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/HeaderFieldExtractor.swift index 3fbc68b..c01f25b 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/HeaderFieldExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/HeaderFieldExtractor.swift @@ -17,8 +17,8 @@ struct HeaderFieldExtractor: MarkupWalker, ValueExtractor { private var warnings: [Proposal.Issue] = [] private var errors: [Proposal.Issue] = [] - mutating func extractValue(from document: Document) -> ExtractionResult<[String: ListItem]> { - guard let headerFields = document.child(through: [(1, UnorderedList.self)]) as? UnorderedList else { + mutating func extractValue(from src: DocumentSource) -> ExtractionResult<[String: ListItem]> { + guard let headerFields = src.document.child(through: [(1, UnorderedList.self)]) as? UnorderedList else { return ExtractionResult(value: nil, warnings: warnings, errors: errors) } visit(headerFields) diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/ImplementationExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/ImplementationExtractor.swift index acc21c4..a0e79da 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/ImplementationExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/ImplementationExtractor.swift @@ -20,8 +20,8 @@ struct ImplementationExtractor: MarkupWalker, ValueExtractor { } private var _implementaton: [Proposal.Implementation] = [] - mutating func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult<[Proposal.Implementation]> { - if let (_ , headerField) = headerFieldsByLabel[["Implementation", "Implementations"]] { + mutating func extractValue(from source: HeaderFieldSource) -> ExtractionResult<[Proposal.Implementation]> { + if let (_ , headerField) = source["Implementation", "Implementations"] { visit(headerField) } // Implementation field is optional. Take no action / add no warning if it is not found diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/PersonExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/PersonExtractor.swift index 32eaf09..92f9c46 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/PersonExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/PersonExtractor.swift @@ -10,16 +10,16 @@ import Markdown import EvolutionMetadataModel struct AuthorExtractor: ValueExtractor { - func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult<[Proposal.Person]> { + func extractValue(from src: HeaderFieldSource) -> ExtractionResult<[Proposal.Person]> { var personExtractor = PersonExtractor(role: .author) - return personExtractor.personArray(from: headerFieldsByLabel) + return personExtractor.personArray(from: src) } } struct ReviewManagerExtractor: ValueExtractor { - func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult<[Proposal.Person]> { + func extractValue(from src: HeaderFieldSource) -> ExtractionResult<[Proposal.Person]> { var personExtractor = PersonExtractor(role: .reviewManager) - return personExtractor.personArray(from: headerFieldsByLabel) + return personExtractor.personArray(from: src) } } @@ -40,13 +40,13 @@ struct PersonExtractor: MarkupWalker { self.role = role } - mutating func personArray(from headerFields: [String : ListItem]) -> ExtractionResult<[Proposal.Person]> { + mutating func personArray(from source: HeaderFieldSource) -> ExtractionResult<[Proposal.Person]> { let headerLabels = switch role { case .author: ["Author", "Authors"] // VALIDATION ENHANCEMENT: Normalize capitalization to 'Review Manager' case .reviewManager: ["Review manager", "Review Manager", "Review managers", "Review Managers"] } - if let (_, headerField) = headerFields[headerLabels] { + if let (_, headerField) = source[headerLabels] { visit(headerField) } diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/PreviousProposalExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/PreviousProposalExtractor.swift index 1fb544e..b48eeed 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/PreviousProposalExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/PreviousProposalExtractor.swift @@ -19,9 +19,9 @@ struct PreviousProposalExtractor: MarkupWalker, ValueExtractor { } private var _previousProposalIDs: [String] = [] - mutating func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult<[String]> { + mutating func extractValue(from source: HeaderFieldSource) -> ExtractionResult<[String]> { - if let prevPropField = headerFieldsByLabel[["Previous Proposal", "Previous Proposals"]] { + if let prevPropField = source["Previous Proposal", "Previous Proposals"] { visit(prevPropField.value) // validate that if the header field is here at least one proposal ID was found diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/ProposalLinkExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/ProposalLinkExtractor.swift index 5e7e5fe..066cdd4 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/ProposalLinkExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/ProposalLinkExtractor.swift @@ -15,8 +15,8 @@ struct ProposalLinkExtractor: MarkupWalker, ValueExtractor { private var warnings: [Proposal.Issue] = [] private var errors: [Proposal.Issue] = [] - mutating func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult { - if let headerField = headerFieldsByLabel["Proposal"] { + mutating func extractValue(from source: HeaderFieldSource) -> ExtractionResult { + if let headerField = source["Proposal"] { visit(headerField) } else { errors.append(.missingProposalIDLink) @@ -35,7 +35,7 @@ struct ProposalLinkExtractor: MarkupWalker, ValueExtractor { errors.append(.reservedProposalID) } - if !proposalLink.text.contains(/^SE-\d\d\d\d$/) { + if !proposalLink.text.contains(source.proposalSpec.project.proposalRegex) { self.proposalLink?.destination = "" // Do not include an incorrect destination errors.append(.proposalIDWrongDigitCount) } diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/StatusExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/StatusExtractor.swift index c2afffc..b818c97 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/StatusExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/StatusExtractor.swift @@ -18,12 +18,12 @@ struct StatusExtractor: MarkupWalker, ValueExtractor { private var extractionDate: Date = Date() var status: Proposal.Status? = nil - mutating func extractValue(from sourceValues: (headerFieldsByLabel: [String : ListItem], extractionDate: Date)) -> ExtractionResult { + mutating func extractValue(from sourceValues: (source: HeaderFieldSource, extractionDate: Date)) -> ExtractionResult { extractionDate = sourceValues.extractionDate // If 'Status' field not found, report - if let headerField = sourceValues.headerFieldsByLabel["Status"] { + if let headerField = sourceValues.source["Status"] { visit(headerField) } diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/SummaryExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/SummaryExtractor.swift index 97ef657..67aaa4c 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/SummaryExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/SummaryExtractor.swift @@ -14,11 +14,11 @@ struct SummaryExtractor: ValueExtractor { private var warnings: [Proposal.Issue] = [] private var errors: [Proposal.Issue] = [] - func extractValue(from document: Document) -> ExtractionResult { + func extractValue(from src: DocumentSource) -> ExtractionResult { var summary = "" var foundIntroduction = false - for child in document.children { + for child in src.document.children { // VALIDATION ENHANCEMENT: Potential for stricter validation of section heading and contents // https://github.com/swiftlang/swift-evolution-metadata-extractor/issues/77 diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/TitleExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/TitleExtractor.swift index c7fc618..2800f24 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/TitleExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/TitleExtractor.swift @@ -14,11 +14,11 @@ struct TitleExtractor: ValueExtractor { private var warnings: [Proposal.Issue] = [] private var errors: [Proposal.Issue] = [] - mutating func extractValue(from document: Document) -> ExtractionResult { + mutating func extractValue(from src: DocumentSource) -> ExtractionResult { var title: String? - if let titleElement = document.child(at: 0) as? Heading { + if let titleElement = src.document.child(at: 0) as? Heading { // VALIDATION ENHANCEMENT: Add warning or error that the title is badly formatted in the markdown // if titleElement.level != 1 { diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/TrackingBugExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/TrackingBugExtractor.swift index 48c447b..958a3c8 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/TrackingBugExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/TrackingBugExtractor.swift @@ -19,8 +19,8 @@ struct TrackingBugExtractor: MarkupWalker, ValueExtractor { } private var _trackingBugs: [Proposal.TrackingBug] = [] - mutating func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult<[Proposal.TrackingBug]> { - if let (_, headerField) = headerFieldsByLabel[["Bug", "Bugs"]] { + mutating func extractValue(from source: HeaderFieldSource) -> ExtractionResult<[Proposal.TrackingBug]> { + if let (_, headerField) = source["Bug", "Bugs"] { visit(headerField) } diff --git a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/UpcomingFeatureFlagExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/UpcomingFeatureFlagExtractor.swift index 4b22eae..1035e0d 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/UpcomingFeatureFlagExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/FieldExtractors/UpcomingFeatureFlagExtractor.swift @@ -24,9 +24,9 @@ struct UpcomingFeatureFlagExtractor: MarkupWalker, ValueExtractor { } } - mutating func extractValue(from headerFieldsByLabel: [String : ListItem]) -> ExtractionResult { + mutating func extractValue(from source: HeaderFieldSource) -> ExtractionResult { - if let uffField = headerFieldsByLabel["Upcoming Feature Flag"] { + if let uffField = source["Upcoming Feature Flag"] { visit(uffField) // validate that flag is extracted if header is present diff --git a/Sources/EvolutionMetadataExtraction/Extractors/ProposalMetadataExtractor.swift b/Sources/EvolutionMetadataExtraction/Extractors/ProposalMetadataExtractor.swift index 9468712..976d740 100644 --- a/Sources/EvolutionMetadataExtraction/Extractors/ProposalMetadataExtractor.swift +++ b/Sources/EvolutionMetadataExtraction/Extractors/ProposalMetadataExtractor.swift @@ -10,6 +10,19 @@ import Foundation import Markdown import EvolutionMetadataModel +struct DocumentSource { + let proposalSpec: ProposalSpec + let document: Document +} + +struct HeaderFieldSource { + let proposalSpec: ProposalSpec + let headerFieldsByLabel: [String : ListItem] + subscript(key: String) -> ListItem? { headerFieldsByLabel[key] } + subscript(key: [String]) -> (key: String, value: ListItem)? { headerFieldsByLabel[key] } + subscript(key: String...) -> (key: String, value: ListItem)? { headerFieldsByLabel[key] } +} + struct ProposalMetadataExtractor { /// Extracts the metadata from a Swift Evolution proposal @@ -35,6 +48,7 @@ struct ProposalMetadataExtractor { } let document = Document(parsing: markdown, options: [.disableSmartOpts]) + let documentSource = DocumentSource(proposalSpec: proposalSpec, document: document) guard !document.isEmpty else { proposal.errors = [.emptyMarkdownFile] @@ -43,40 +57,41 @@ struct ProposalMetadataExtractor { // VALIDATION ENHANCEMENT: Currently no error or warning reported if missing title or summary. // Change to if lets to detect missing values and report. - proposal.title = extractValue(from: document, with: TitleExtractor.self) ?? "" - proposal.summary = extractValue(from: document, with: SummaryExtractor.self) ?? "" + proposal.title = extractValue(from: documentSource, with: TitleExtractor.self) ?? "" + proposal.summary = extractValue(from: documentSource, with: SummaryExtractor.self) ?? "" - if let headerFieldsByLabel = extractValue(from: document, with: HeaderFieldExtractor.self) { - - let proposalLink = extractValue(from: headerFieldsByLabel, with: ProposalLinkExtractor.self) + if let headerFieldsByLabel = extractValue(from: documentSource, with: HeaderFieldExtractor.self) { + let headerFieldsSource = HeaderFieldSource(proposalSpec: proposalSpec, headerFieldsByLabel: headerFieldsByLabel) + + let proposalLink = extractValue(from: headerFieldsSource, with: ProposalLinkExtractor.self) proposal.id = proposalLink?.text ?? "" proposal.link = proposalLink?.destination ?? "" /* VALIDATION ENHANCEMENT: Probably also want to validate that the destination link matches the passed-in filename and the id matches the proposalID field */ - if let authors = extractValue(from: headerFieldsByLabel, with: AuthorExtractor.self), !authors.isEmpty { + if let authors = extractValue(from: headerFieldsSource, with: AuthorExtractor.self), !authors.isEmpty { proposal.authors = authors } else { errors.append(.missingAuthors) } - if let reviewManagers = extractValue(from: headerFieldsByLabel, with: ReviewManagerExtractor.self), !reviewManagers.isEmpty { + if let reviewManagers = extractValue(from: headerFieldsSource, with: ReviewManagerExtractor.self), !reviewManagers.isEmpty { proposal.reviewManagers = reviewManagers } else { warnings.append(.missingReviewManagers) } - proposal.upcomingFeatureFlag = extractValue(from: headerFieldsByLabel, with: UpcomingFeatureFlagExtractor.self) - proposal.previousProposalIDs = extractValue(from: headerFieldsByLabel, with: PreviousProposalExtractor.self) - proposal.trackingBugs = extractValue(from: headerFieldsByLabel, with: TrackingBugExtractor.self) - proposal.implementation = extractValue(from: headerFieldsByLabel, with: ImplementationExtractor.self) + proposal.upcomingFeatureFlag = extractValue(from: headerFieldsSource, with: UpcomingFeatureFlagExtractor.self) + proposal.previousProposalIDs = extractValue(from: headerFieldsSource, with: PreviousProposalExtractor.self) + proposal.trackingBugs = extractValue(from: headerFieldsSource, with: TrackingBugExtractor.self) + proposal.implementation = extractValue(from: headerFieldsSource, with: ImplementationExtractor.self) - if let discussions = extractValue(from: (headerFieldsByLabel, proposalSpec.id), with: DiscussionExtractor.self) { + if let discussions = extractValue(from: headerFieldsSource, with: DiscussionExtractor.self) { proposal.discussions = discussions } else { errors.append(.missingReviewField) } - if let status = extractValue(from: (headerFieldsByLabel, extractionDate), with: StatusExtractor.self) { + if let status = extractValue(from: (headerFieldsSource, extractionDate), with: StatusExtractor.self) { if case .implemented(let version) = status, version == "none" { // VALIDATION ENHANCEMENT: Figure out a better way to special case the missing version strings for these proposals // VALIDATION ENHANCEMENT: Possibly just add version strings to the actual proposals diff --git a/Sources/EvolutionMetadataExtraction/Utilities/Networking.swift b/Sources/EvolutionMetadataExtraction/Utilities/Networking.swift index 0aecce7..c7244a6 100644 --- a/Sources/EvolutionMetadataExtraction/Utilities/Networking.swift +++ b/Sources/EvolutionMetadataExtraction/Utilities/Networking.swift @@ -14,9 +14,6 @@ import FoundationNetworking // Required for Linux #endif struct PreviousResultsFetcher { - - static let previousResultsURL = URL(string: "https://download.swift.org/swift-evolution/v1/evolution.json")! - static func fetchPreviousResultsData(url: URL) async throws -> Data { let request = URLRequest(url: url, cachePolicy: .reloadIgnoringLocalCacheData) verbosePrint("Fetching with URLRequest:\n\(request.verboseDescription)") @@ -59,9 +56,9 @@ struct GitHubContentItem: Codable { /// Returns `nil` if this content item corresponds to a /// subdirectory instead of a direct proposal document. - func proposalSpec(sortIndex: Int) -> ProposalSpec? { + func proposalSpec(project: Project, sortIndex: Int) -> ProposalSpec? { guard let download_url else { return nil } - return ProposalSpec(url: URL(string: download_url)!, sha: sha, sortIndex: sortIndex) + return ProposalSpec(project: project, url: URL(string: download_url)!, sha: sha, sortIndex: sortIndex) } } @@ -83,25 +80,14 @@ struct GitHubPullFileItem: Codable { filename.hasSuffix(".md") } - func proposalSpec(sortIndex: Int) -> ProposalSpec { - ProposalSpec(url: URL(string: raw_url)!, sha: sha, sortIndex: sortIndex) + func proposalSpec(project: Project, sortIndex: Int) -> ProposalSpec { + ProposalSpec(project: project, url: URL(string: raw_url)!, sha: sha, sortIndex: sortIndex) } } struct GitHubFetcher { - struct Endpoint { - private static let endpointBaseURL = URL(string: "https://api.github.com/repos/swiftlang/swift-evolution")! - static let githubMainBranchEndpoint = endpointBaseURL.appending(path:"branches/main") - static let githubIssuesEndpoint = endpointBaseURL.appending(path: "issues?since=2023-08-01T01:00:00Z&state=all") - static let githubProposalsEndpoint = endpointBaseURL.appending(path: "contents/proposals" ) - static func githubPullEndpoint(for request: Int) -> URL { - endpointBaseURL.appending(path: "pulls/\(request)/files") - .appending(queryItems: [URLQueryItem(name: "per_page", value: "100")]) - } - } - static let gitHubTokenHeaderValue: String? = { if let githubToken = ProcessInfo.processInfo.environment["GITHUB_TOKEN"] { "Bearer \(githubToken)" @@ -116,25 +102,25 @@ struct GitHubFetcher { return String(decoding: data, as: UTF8.self) } - static func fetchMainBranch() async throws -> GitHubBranch { + static func fetchMainBranch(for project: Project) async throws -> GitHubBranch { // Always reload since a new commit may have occurred. // Caching for other calls is fine since the branch commit SHA is included in the requested URL - let branchInfo = try await getGitHubAPIValue(for: Endpoint.githubMainBranchEndpoint, type: GitHubBranch.self, cachePolicy: .reloadIgnoringLocalCacheData) + let branchInfo = try await getGitHubAPIValue(for: project.mainBranchEndpoint, type: GitHubBranch.self, cachePolicy: .reloadIgnoringLocalCacheData) return branchInfo } // Returns only content items that are Markdown files in the /proposals directory of the repo. // Filters out subdirectories and files without ".md" suffix. - static func fetchProposalContentItems(for reference: String? = nil) async throws -> [GitHubContentItem] { - var endpoint = Endpoint.githubProposalsEndpoint + static func fetchProposalContentItems(for project: Project, sha reference: String? = nil) async throws -> [GitHubContentItem] { + var endpoint = project.proposalListingEndpoint if let reference { endpoint.append(queryItems: [URLQueryItem(name: "ref", value: reference)]) } return try await getGitHubAPIValue(for: endpoint, type: [GitHubContentItem].self).filter { $0.isMarkdownFile } } - static func fetchPullRequestProposalList(for pullNumber: Int) async throws -> [GitHubPullFileItem] { - let endpointURL = Endpoint.githubPullEndpoint(for: pullNumber) + static func fetchPullRequestProposalList(for project: Project, pullRequestNumber: Int) async throws -> [GitHubPullFileItem] { + let endpointURL = project.githubPullEndpoint(for: pullRequestNumber) let contents = try await getGitHubAPIValue(for: endpointURL, type: [GitHubPullFileItem].self) return contents.filter { $0.isProposalFile } } diff --git a/Sources/EvolutionMetadataExtraction/Utilities/Project.swift b/Sources/EvolutionMetadataExtraction/Utilities/Project.swift new file mode 100644 index 0000000..283bbca --- /dev/null +++ b/Sources/EvolutionMetadataExtraction/Utilities/Project.swift @@ -0,0 +1,88 @@ +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2026 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors + +import Foundation + +/** + Represents a software project with its own set of evolution proposals. + */ +public final class Project: Sendable { + let name: String + let organization: String + let repository: String + let path: String + let proposalPrefix: String + nonisolated(unsafe) let proposalRegex: Regex + let previousResultsURL: URL + let defaultOutputFilename: String + private let endpointBaseURL: URL + let mainBranchEndpoint: URL + let proposalListingEndpoint: URL + let validationExemptions: [Int:RangeSet] + + private init(name: String, organization: String, repository: String, path: String, proposalPrefix: String, proposalRegex: Regex, previousResultsURL: URL, defaultOutputFilename: String, validationExemptions: [Int:RangeSet]) { + self.name = name + self.organization = organization + self.repository = repository + self.path = path + self.proposalPrefix = proposalPrefix + self.proposalRegex = proposalRegex + self.previousResultsURL = previousResultsURL + self.defaultOutputFilename = defaultOutputFilename + self.validationExemptions = validationExemptions + + self.endpointBaseURL = URL(string: "https://api.github.com/repos/")!.appending(components: organization, repository) + self.mainBranchEndpoint = endpointBaseURL.appending(path:"branches/main") + self.proposalListingEndpoint = endpointBaseURL.appending(path: "contents/" + path) + } + + func githubPullEndpoint(for request: Int) -> URL { + endpointBaseURL.appending(path: "pulls/\(request)/files") + .appending(queryItems: [URLQueryItem(name: "per_page", value: "100")]) + } + + public static var `default`: Project { + swift + } + + public static let swift = Project( + name: "Swift", + organization: "swiftlang", + repository: "swift-evolution", + path: "proposals", + proposalPrefix: "SE", + proposalRegex: /^SE-\d\d\d\d$/, + previousResultsURL: URL(string: "https://download.swift.org/swift-evolution/v1/evolution.json")!, + defaultOutputFilename: "evolution.json", + validationExemptions: [:] + ) + + public static let swiftTesting = Project( + name: "Swift Testing", + organization: "swiftlang", + repository: "swift-evolution", + path: "proposals/testing", + proposalPrefix: "ST", + proposalRegex: /^ST-\d\d\d\d$/, + previousResultsURL: URL(string: "https://download.swift.org/swift-evolution/v1/testing-evolution.json")!, + defaultOutputFilename: "testing-evolution.json", + validationExemptions: [:] + ) + + public static let foundation = Project( + name: "Foundation", + organization: "swiftlang", + repository: "swift-foundation", + path: "Proposals", + proposalPrefix: "SF", + proposalRegex: /^SF-\d\d\d\d$/, + previousResultsURL: URL(string: "https://download.swift.org/swift-evolution/v1/foundation-evolution.json")!, + defaultOutputFilename: "foundation-evolution.json", + validationExemptions: [:] + ) +} diff --git a/Sources/EvolutionMetadataExtraction/Utilities/Snapshot.swift b/Sources/EvolutionMetadataExtraction/Utilities/Snapshot.swift index 3f931f3..8c23ace 100644 --- a/Sources/EvolutionMetadataExtraction/Utilities/Snapshot.swift +++ b/Sources/EvolutionMetadataExtraction/Utilities/Snapshot.swift @@ -10,6 +10,7 @@ import Foundation import EvolutionMetadataModel struct Snapshot { + let project: Project let sourceURL: URL? let destURL: URL? var proposalListing: [GitHubContentItem]? = nil // Ad-hoc snapshots may not have these @@ -23,7 +24,8 @@ struct Snapshot { let temporarySnapshotDirectory: URL? let temporaryProposalsDirectory: URL? - init(sourceURL: URL?, destURL: URL?, proposalListing: [GitHubContentItem]?, directoryContents: [ProposalSpec], proposalSpecs: [ProposalSpec], previousResults: EvolutionMetadata?, expectedResults: EvolutionMetadata?, branchInfo: GitHubBranch?, snapshotDate: Date) { + init(project: Project, sourceURL: URL?, destURL: URL?, proposalListing: [GitHubContentItem]?, directoryContents: [ProposalSpec], proposalSpecs: [ProposalSpec], previousResults: EvolutionMetadata?, expectedResults: EvolutionMetadata?, branchInfo: GitHubBranch?, snapshotDate: Date) { + self.project = project self.sourceURL = sourceURL self.destURL = destURL self.proposalListing = proposalListing @@ -43,7 +45,7 @@ struct Snapshot { } } - static func makeSnapshot(snapshotURL: URL, destURL: URL?, ignorePreviousResults: Bool, extractionDate: Date) async throws -> Snapshot{ + static func makeSnapshot(project: Project = Project.default, snapshotURL: URL, destURL: URL?, ignorePreviousResults: Bool, extractionDate: Date) async throws -> Snapshot{ var proposalListingFound = false var previousResultsFound = false @@ -59,13 +61,13 @@ struct Snapshot { .sorted(by: { $0.lastPathComponent < $1.lastPathComponent }) .filter { $0.pathExtension == "md"} .enumerated() - .map { ProposalSpec(url: $1, sha: "", sortIndex: $0) } // try! SHA1.hexForData(Data(contentsOf: $0))) + .map { ProposalSpec(project: project, url: $1, sha: "", sortIndex: $0) } // try! SHA1.hexForData(Data(contentsOf: $0))) let proposalSpecs: [ProposalSpec] let proposalListing: [GitHubContentItem]? if let contentItems = try FileUtilities.decode([GitHubContentItem].self, from: proposalListingURL) { proposalListing = contentItems - proposalSpecs = contentItems.enumerated().map { ProposalSpec(url: snapshotURL.appending(path: $1.path), sha: $1.sha, sortIndex: $0) } + proposalSpecs = contentItems.enumerated().map { ProposalSpec(project: project, url: snapshotURL.appending(path: $1.path), sha: $1.sha, sortIndex: $0) } proposalListingFound = true if directoryContents.count != proposalSpecs.count { print("WARNING: Number of proposals in proposals directory does not match number of proposals in 'proposal-listing.json") @@ -100,7 +102,7 @@ struct Snapshot { Proposal count: \(proposalSpecs.count) """) - return Snapshot(sourceURL: snapshotURL, destURL: destURL, proposalListing: proposalListing, directoryContents: directoryContents, proposalSpecs: proposalSpecs, previousResults: previousResults, expectedResults: expectedResults, branchInfo: branchInfo, snapshotDate: snapshotDate) + return Snapshot(project: project, sourceURL: snapshotURL, destURL: destURL, proposalListing: proposalListing, directoryContents: directoryContents, proposalSpecs: proposalSpecs, previousResults: previousResults, expectedResults: expectedResults, branchInfo: branchInfo, snapshotDate: snapshotDate) // Expected test file directory structure: