我下面有这个 Power Query 代码,它在阅读 PDF 和输出我想要的列方面效果很好。如何修改它以读取包含相同格式的 PDF 文件的文件夹并循环遍历所有 PDF 文件。该文件夹最多可包含 300 个文件。
感谢您的预期帮助
let
Source = Pdf.Tables(File.Contents("C:\Users\MyReport.pdf"), [Implementation="1.3"]),
Page1 = Source{[Id="Page001"]}[Data],
#"Promoted Headers" = Table.PromoteHeaders(Page1, [PromoteAllScalars=true]),
#"Changed Type" = Table.TransformColumnTypes(#"Promoted Headers",{{"Column1", type text}, {"[image]", type text}, {"TestHeader", type text}, {"Column4", type text}, {"Column5", type text}, {"Column6", type text}, {"Column7", type text}, {"Column8", type text}, {"Column9", type text}, {"Column10", type text}}),
#"Removed Columns" = Table.RemoveColumns(#"Changed Type",{"Column5", "Column6", "Column8", "Column10"}),
#"Filtered Rows" = Table.SelectRows(#"Removed Columns", each not Text.StartsWith([#"TestHeader"], "MyOff")),
#"Renamed Columns" = Table.RenameColumns(#"Filtered Rows",{{"[image]", "Column2"}, {"TestHeader", "Column3"}, {"Column7", "Column5"}, {"Column9", "Column6"}}),
Page2 = Source{[Id="Page002"]}[Data],
#"Changed Type2" = Table.TransformColumnTypes(Page2,{{"Column1", type text}, {"Column2", type text}, {"Column3", type text}, {"Column4", type text}, {"Column5", type text}, {"Column6", type text}, {"Column7", type text}}),
#"Removed Columns2" = Table.RemoveColumns(#"Changed Type2",{"Column5"}),
#"Renamed Columns2" = Table.RenameColumns(#"Removed Columns2",{{"Column6", "Column5"}, {"Column7", "Column6"}}),
#"Appended Query" = Table.Combine({#"Renamed Columns", #"Renamed Columns2"}),
#"Added Custom" = Table.AddColumn(#"Appended Query", "Test_Date", each Record.Field(#"Appended Query"{0},"Column3")),
#"Filtered Rows1" = Table.SelectRows(#"Added Custom", each Text.StartsWith([Column1], "NF")),
#"Renamed Columns1" = Table.RenameColumns(#"Filtered Rows1",{{"Column1", "Wells"}, {"Column2", "Wet"}, {"Column3", "Hours"}, {"Column4", "FTHP_p"}, {"Column5", "FTHP_b"}, {"Column6", "Comments"}})
in
#"Renamed Columns1"
与此类似的东西