如何使用 vba 正确重新格式化具有多种变化的维度值？

含咖啡因的迈克

我正在尝试创建一个 Excel 宏，将维度值格式化为我们公司的格式。这样我们就可以轻松地将数据导入我们的系统，而无需手动处理数千个维度。不过，我遇到了一些问题：

供应商发送给我们的维度有很多变化，这让我很难想出某种正则表达式来捕获所有值。
即使我能够想出某种正则表达式来处理值，我也不确定如何用正确的形式替换值，因为我不确定是否可以用正则表达式替换捕获的正则表达式组值。如果是这样，我不知道如何处理这种情况。

我公司对Dimensions的标准格式如下：

每个值最多可以有 3 个参数

Attribute1:Value1:Unit1;Attribute2:Value2:Unit2;Attribute3:Value3:Unit3

示例： 1" L x 2" W x 3" H转换为长度:1:in;宽度:2:in;高度:3:in

可以与它一起使用的可能值是：

长度
宽度
高度
弧
区域
圆周
深度
早晨
厚度

我在过去一年中注意到的一些变化包括：

长度 - L 或 L。
宽度 - W 或 W。
高度 - H, H., 高度
周长 - 圆形
深度 - D、D.、深
直径 - 直径或直径。
厚度 - 厚
in - 英寸、英寸、英寸、"、''（2 个撇号）
英尺 - 英尺

产品尺寸的小样本（注意不一致）：

3 3/4" Width x 2 1/2" Height
L 4 3/4" x W 1 1/2" x H 3"
3 1/2" W x 2 1/8" H x 2 7/8" D
3 5/8" W x 2 1/2" H x 5/8" Depth
3 3/4" W x 1" H
1 1/4" W x 3 1/4" H
2 3/8" Diameter
3" W x 2 1/2" H
2" W x 3" H
2 1/2" W x 2" H
1 3/8" W x 2 1/8" H
3 1/2" W x 3 1/2" H
1 1/2" W x 3" H
2" W x 1 7/8" H x 1 1/2" D
4 3/4" W x 3 1/2" H
4 3/4" W x 4" H x 1 1/4" D
3 1/2" W x 3 1/2" H x 3 1/2" D
3-1/2" W x 2-3/4" H 
3.5" W x 4" H
3" H
3 1/4" W x 2 1/4" H
4 7/16" W x 6 1/4" H
3 1/4" W x 3 1/4" H
5" W x 7" H

到目前为止，我已经提出了 regex (\d+(.| |/|)\d+((/)\d+|)|\d+)，它似乎可以获取所有数字，但我不太确定如何找到属性和单位的所有不同变体。我认为唯一可行的方法是后视和前瞻，但我对这种正则表达式的风格还不够精通，无法弄清楚。

问题 1：正则表达式是完成这项任务的最佳方式还是有更好的方式？

问题 2：最终的问题，我怎样才能完成这个复杂的任务，或者甚至可以使用 vba 远程完成？

欧米茄条纹

您可以创建一个解析器/渲染器，下面的示例展示了如何在基于 RegEx 的EBNF解析器中实现，将代码放入标准 VBA 模块中：

Option Explicit

Private sBuffer As String
Private oTokens As Object
Private oRegEx As Object

Sub TestParserRender()

    Dim sScr As String
    Dim sResult As String

    sScr = ReadTextFile(ThisWorkbook.Path & "\Source.txt", -2)
    sResult = Parse(sScr)
    WriteTextFile sResult, ThisWorkbook.Path & "\Result.txt", -1

End Sub

Function Parse(ByVal sSample As String) As String

    ' Init
    sBuffer = sSample
    Set oTokens = CreateObject("Scripting.Dictionary")
    Set oRegEx = CreateObject("VBScript.RegExp")
    With oRegEx
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        ' Cast variations in attributes and units
        .Pattern = "\bL\.(?=\s|$)|\bL\b"
        sBuffer = .Replace(sBuffer, "Length")
        .Pattern = "\bW\.(?=\s|$)|\bW\b"
        sBuffer = .Replace(sBuffer, "Width")
        .Pattern = "\bH\.(?=\s|$)|\bH\b|\bHeigth\b"
        sBuffer = .Replace(sBuffer, "Height")
        .Pattern = "\bRound\b"
        sBuffer = .Replace(sBuffer, "Circumference")
        .Pattern = "\bD\.(?=\s|$)|\bD\b|\bDeep\b"
        sBuffer = .Replace(sBuffer, "Depth")
        .Pattern = "\bDia\.(?=\s|$)|\bDiameter\b"
        sBuffer = .Replace(sBuffer, "Dia")
        .Pattern = "\bThick\b"
        sBuffer = .Replace(sBuffer, "Thickness")
        .Pattern = "(?:\""|'')(?=\s|$)"
        sBuffer = .Replace(sBuffer, " in")
        .Pattern = "\binch\b|\binches\b|\bin\.(?=\s|$)"
        sBuffer = .Replace(sBuffer, "in")
        .Pattern = "\bfeet\b"
        sBuffer = .Replace(sBuffer, "ft")
        ' Tokenize instances
        .Pattern = "<\d+[savedpun]>"
        Tokenize "e" ' Escape reserved sequence
        .Pattern = "\b(?:\d+((?:[ -]\d+)?(?:\/|\.)\d+)?)(?=\D)"
        Tokenize "n" ' Number
        .Pattern = "\b(?:Length|Width|Height|Arc|Area|Circumference|Depth|Dia|Thickness)\b"
        Tokenize "a" ' Attribute
        .Pattern = "\b(?:in|ft)\b"
        Tokenize "u" ' Units
        .Pattern = "<\d+n>[ \t]*<\d+u>"
        Tokenize "v" ' Number + Unit = Value
        .Pattern = "(<\d+v>)([ \t]*)(<\d+a>)"
        sBuffer = .Replace(sBuffer, "$3$2$1") ' Swap Value + Attribute = Attribute + Value
        .Pattern = "<\d+a>[ \t]*<\d+v>"
        Tokenize "p" ' Attribute + Value = Parameter
        .Pattern = "^[ \t]*<\d+p>(?:[ \t]*X[ \t]*<\d+p>){0,2}[ \t]*$"
        Tokenize "d" ' Parameter X Parameter X Parameter = Dimension
        .MultiLine = False
        .Pattern = "^(?:\r\n)*<\d+d>(?:(?:\r\n)+<\d+d>)*(?:\r\n)*$"
        Tokenize "s" ' Dimension * N times = Structure
        .Pattern = "^<\d+s>$" ' Top level Structure single element
        If .Test(sBuffer) And oTokens.Exists(sBuffer) Then
            Parse = Retrieve(sBuffer) ' Render if success
        Else
            MsgBox "Parsing failed"
            .Pattern = "^([\s\S]+?)(<\d+[savedpun]>)"
            sBuffer = .Replace(sBuffer, "[$1]$2") ' Put failed from begin in brackets
            .Pattern = "(<\d+[savedpun]>)([\s\S]+?)(?=<\d+[savedpun]>|$)"
            sBuffer = .Replace(sBuffer, "$1[$2]") ' Put failed between tokens in brackets
            .Pattern = "\[\r\n\]"
            sBuffer = .Replace(sBuffer, vbCrLf) ' Recover dummy new lines in brackets
            .Global = False
            .Pattern = "<\d+[savedpun]>" ' Retrieve the rest tokens
            Do
                With .Execute(sBuffer)
                    If .Count = 0 Then Exit Do
                    sBuffer = Replace(sBuffer, .Item(0).value, oTokens(.Item(0).value))
                End With
            Loop
            Parse = sBuffer
        End If
    End With
    Set oTokens = Nothing
    Set oRegEx = Nothing

End Function

Private Sub Tokenize(sType)

    Dim aContent() As String
    Dim lCopyIndex As Long
    Dim i As Long
    Dim sKey As String

    With oRegEx.Execute(sBuffer)
        If .Count = 0 Then Exit Sub
        ReDim aContent(0 To .Count - 1)
        lCopyIndex = 1
        For i = 0 To .Count - 1
            With .Item(i)
                sKey = "<" & oTokens.Count & sType & ">"
                oTokens(sKey) = .value
                aContent(i) = Mid(sBuffer, lCopyIndex, .FirstIndex - lCopyIndex + 1) & sKey
                lCopyIndex = .FirstIndex + .Length + 1
            End With
        Next
    End With
    sBuffer = Join(aContent, "") & Mid(sBuffer, lCopyIndex, Len(sBuffer) - lCopyIndex + 1)

End Sub

Private Function Retrieve(sTokenKey As String) As String

    Dim sTokenValue As String
    Dim aTokens() As String
    Dim i As Long
    Dim aContent() As String

    sTokenValue = oTokens(sTokenKey)
    Select Case Left(Right(sTokenKey, 2), 1)
        Case "s", "d"
            aTokens = Split(sTokenValue, "<")
            ReDim aContent(UBound(aTokens) - 1)
            For i = 1 To UBound(aTokens)
                aContent(i - 1) = Retrieve("<" & Split(aTokens(i), ">", 2)(0) & ">")
            Next
            Retrieve = Join(aContent, IIf(Left(Right(sTokenKey, 2), 1) = "s", vbCrLf, ";"))
        Case "p", "v"
            aTokens = Split(sTokenValue, "<")
            Retrieve = _
                Retrieve("<" & Split(aTokens(1), ">", 2)(0) & ">") & _
                ":" & _
                Retrieve("<" & Split(aTokens(2), ">", 2)(0) & ">")
        Case "a", "u", "n"
            Retrieve = sTokenValue
    End Select

End Function

Function ReadTextFile(sPath As String, lFormat As Long) As String
    ' lFormat -2 - System default, -1 - Unicode, 0 - ASCII
    With CreateObject("Scripting.FileSystemObject").OpenTextFile(sPath, 1, False, lFormat)
        ReadTextFile = ""
        If Not .AtEndOfStream Then ReadTextFile = .ReadAll
        .Close
    End With
End Function

Sub WriteTextFile(sContent As String, sPath As String, lFormat As Long)
    With CreateObject("Scripting.FileSystemObject").OpenTextFile(sPath, 2, True, lFormat)
        .Write (sContent)
        .Close
    End With
End Sub

将样本作为 ANSI 或 Unicode 保存到Source.txt与 Excel 文件相同的文件夹中的文本文件中，然后运行TestParserRender(). 输出将保存到文本文件Result.txt。处理从解析开始。属性和单位的变化首先通过 RegEx 替换进行转换。然后匹配到 RegEx 模式的部分被折叠成标记。错误的值 + 属性序列通过替换替换的 RegEx 子匹配得到纠正。在解析结束时，应留下单个顶级 Structure 标记，否则会引发错误。如果解析失败，则无法识别的部分将放入输出中的大括号中。如果成功，则内容检索和渲染的反向过程将继续到最后一个标记。

大纲中的解析算法可以用下面的 EBNF 语法表示（简化，替换未显示）：

structure ::= ( "\n\r" )* dimension ( ( "\n\r" )+ dimension )* ( "\n\r" )*
dimension ::= ( " " | "\t" )* parameter ( ( " " | "\t" )+ "X" ( " " | "\t" )+ parameter )? ( ( " " | "\t" )+ "X" ( " " | "\t" )+ parameter )? ( " " | "\t" )*
parameter ::= attribute ( " " | "\t" )* value
attribute ::= "\b" ( "Length" | "Width" | "Height" | "Arc" | "Area" | "Circumference" | "Depth" | "Dia" | "Thickness" ) "\b"
value ::= number ( " " | "\t" ) unit
number ::= digits ( ( ( ( ' ' | '-' ) digits )? '/' | '.' ) digits )?
digits ::= digit+
digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
unit ::= "\b" ( "in" | "ft" ) "\b"

及相关图：

您提供的示例的输出如下：

Width:3 3/4:in;Height:2 1/2:in
Length:4 3/4:in;Width:1 1/2:in;Height:3:in
Width:3 1/2:in;Height:2 1/8:in;Depth:2 7/8:in
Width:3 5/8:in;Height:2 1/2:in;Depth:5/8:in
Width:3 3/4:in;Height:1:in
Width:1 1/4:in;Height:3 1/4:in
Dia:2 3/8:in
Width:3:in;Height:2 1/2:in
Width:2:in;Height:3:in
Width:2 1/2:in;Height:2:in
Width:1 3/8:in;Height:2 1/8:in
Width:3 1/2:in;Height:3 1/2:in
Width:1 1/2:in;Height:3:in
Width:2:in;Height:1 7/8:in;Depth:1 1/2:in
Width:4 3/4:in;Height:3 1/2:in
Width:4 3/4:in;Height:4:in;Depth:1 1/4:in
Width:3 1/2:in;Height:3 1/2:in;Depth:3 1/2:in
Width:3-1/2:in;Height:2-3/4:in
Width:3.5:in;Height:4:in
Height:3:in
Width:3 1/4:in;Height:2 1/4:in
Width:4 7/16:in;Height:6 1/4:in
Width:3 1/4:in;Height:3 1/4:in
Width:5:in;Height:7:in

顺便说一句，我在VBA JSON parser 中使用了相同的方法。

本文收集自互联网，转载请注明来源。

如有侵权，请联系 [email protected] 删除。

编辑于 2021-06-4

我来说两句

0 条评论

登录后参与评论

上一篇：为什么kibana的可视化图表中的唯一计数不正确？

TOP 榜单

文章

如何使用 vba 正确重新格式化具有多种变化的维度值？

如何使用 vba 正确重新格式化具有多种变化的维度值？

蓝屏死机没有修复解决方案

计算数据帧中每行的NA

UITableView的项目向下滚动后更改颜色，然后快速备份

Node.js中未捕获的异常错误，发生调用

在 Python 2.7 中。如何从文件中读取特定文本并分配给变量

Linux的官方Adobe Flash存储库是否已过时？

验证REST API参数

ggplot：对齐多个分面图-所有大小不同的分面

Mac OS X更新后的GRUB 2问题

通过 Git 在运行 Jenkins 作业时获取 ClassNotFoundException

带有错误“ where”条件的查询如何返回结果？

用日期数据透视表和日期顺序查询

VB.net将2条特定行导出到DataGridView

如何从视图一次更新多行（ASP.NET - Core）

Java Eclipse中的错误13，如何解决？

尝试反复更改屏幕上按钮的位置 - kotlin android studio

离子动态工具栏背景色

应用发明者仅从列表中选择一个随机项一次

当我尝试下载 StanfordNLP en 模型时，出现错误

python中的boto3文件上传

在同一Pushwoosh应用程序上Pushwoosh多个捆绑ID